mirror of https://github.com/citusdata/citus.git
Merge remote-tracking branch 'origin/master' into columnar-index
Conflicts: src/test/regress/expected/columnar_empty.out src/test/regress/expected/multi_extension.outpull/5052/head
commit
3d11c0f9ef
|
@ -64,6 +64,12 @@ jobs:
|
|||
- run:
|
||||
name: 'Check if changed'
|
||||
command: git diff --exit-code
|
||||
- run:
|
||||
name: 'Check for gitignore entries .for source files'
|
||||
command: ci/fix_gitignore.sh
|
||||
- run:
|
||||
name: 'Check if changed'
|
||||
command: git diff --exit-code
|
||||
- run:
|
||||
name: 'Check for lengths of changelog entries'
|
||||
command: ci/disallow_long_changelog_entries.sh
|
||||
|
@ -445,6 +451,12 @@ workflows:
|
|||
image_tag: '12.6'
|
||||
make: check-multi
|
||||
requires: [build-12]
|
||||
- test-citus:
|
||||
name: 'test-12_check-multi-1'
|
||||
pg_major: 12
|
||||
image_tag: '12.6'
|
||||
make: check-multi-1
|
||||
requires: [build-12]
|
||||
- test-citus:
|
||||
name: 'test-12_check-mx'
|
||||
pg_major: 12
|
||||
|
@ -513,6 +525,12 @@ workflows:
|
|||
image_tag: '13.2'
|
||||
make: check-multi
|
||||
requires: [build-13]
|
||||
- test-citus:
|
||||
name: 'test-13_check-multi-1'
|
||||
pg_major: 13
|
||||
image_tag: '13.2'
|
||||
make: check-multi-1
|
||||
requires: [build-13]
|
||||
- test-citus:
|
||||
name: 'test-13_check-mx'
|
||||
pg_major: 13
|
||||
|
|
84
CHANGELOG.md
84
CHANGELOG.md
|
@ -1,3 +1,87 @@
|
|||
### citus v10.1.0 (June 15, 2021) ###
|
||||
|
||||
* Drops support for PostgreSQL 11
|
||||
|
||||
* Adds `shard_count` parameter to `create_distributed_table` function
|
||||
|
||||
* Adds support for `ALTER DATABASE OWNER`
|
||||
|
||||
* Adds support for temporary columnar tables
|
||||
|
||||
* Adds support for using sequences as column default values when syncing
|
||||
metadata
|
||||
|
||||
* `alter_columnar_table_set` enforces columnar table option constraints
|
||||
|
||||
* Continues to remove shards after failure in `DropMarkedShards`
|
||||
|
||||
* Deprecates the `citus.replication_model` GUC
|
||||
|
||||
* Enables `citus.defer_drop_after_shard_move` by default
|
||||
|
||||
* Ensures free disk space before moving a shard
|
||||
|
||||
* Fetches shard size on the fly for the rebalance monitor
|
||||
|
||||
* Ignores old placements when disabling or removing a node
|
||||
|
||||
* Implements `improvement_threshold` at shard rebalancer moves
|
||||
|
||||
* Improves orphaned shard cleanup logic
|
||||
|
||||
* Improves performance of `citus_shards`
|
||||
|
||||
* Introduces `citus.local_hostname` GUC for connections to the current node
|
||||
|
||||
* Makes sure connection is closed after each shard move
|
||||
|
||||
* Makes sure that target node in shard moves is eligible for shard move
|
||||
|
||||
* Optimizes partitioned disk size calculation for shard rebalancer
|
||||
|
||||
* Prevents connection errors by properly terminating connections
|
||||
|
||||
* Prevents inheriting a distributed table
|
||||
|
||||
* Prevents users from dropping & truncating known shards
|
||||
|
||||
* Pushes down `VALUES` clause as long as not in outer part of a `JOIN`
|
||||
|
||||
* Reduces memory usage for multi-row inserts
|
||||
|
||||
* Reduces memory usage while rebalancing shards
|
||||
|
||||
* Removes length limits around partition names
|
||||
|
||||
* Executor avoids opening extra connections
|
||||
|
||||
* Fixes a bug that can cause a crash when DEBUG4 logging is enabled
|
||||
|
||||
* Fixes data race in `get_rebalance_progress`
|
||||
|
||||
* Fixes error message for local table joins
|
||||
|
||||
* Fixes `FROM ONLY` queries on partitioned tables
|
||||
|
||||
* Fixes issues caused by omitting public schema in queries
|
||||
|
||||
* Fixes nested `SELECT` query with `UNION` bug
|
||||
|
||||
* Fixes null relationName bug at parallel execution
|
||||
|
||||
* Fixes possible segfaults when using Citus in the middle of an upgrade
|
||||
|
||||
* Fixes problems with concurrent calls of `DropMarkedShards`
|
||||
|
||||
* Fixes shared dependencies that are not resident in a database
|
||||
|
||||
* Fixes stale hostnames bug in prepared statements after `master_update_node`
|
||||
|
||||
* Fixes using 2PC when it might be necessary
|
||||
|
||||
* Preserves access method of materialized views when undistributing
|
||||
or altering distributed tables
|
||||
|
||||
### citus v8.3.3 (March 23, 2021) ###
|
||||
|
||||
* Fixes a bug that leads to various issues when a connection is lost
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
[](https://slack.citusdata.com)
|
||||
[](https://docs.citusdata.com/)
|
||||
[](https://codecov.io/gh/citusdata/citus/branch/master/graph/badge.svg)
|
||||
[](https://app.codecov.io/gh/citusdata/citus)
|
||||
|
||||
## What is Citus?
|
||||
|
||||
|
|
16
ci/README.md
16
ci/README.md
|
@ -156,9 +156,9 @@ git merge "community/$PR_BRANCH"
|
|||
familiar with the change.
|
||||
5. You should rerun the `check-merge-to-enterprise` check on
|
||||
`community/$PR_BRANCH`. You can use re-run from failed option in circle CI.
|
||||
6. You can now merge the PR on community. Be sure to NOT use "squash and merge",
|
||||
6. You can now merge the PR on enterprise. Be sure to NOT use "squash and merge",
|
||||
but instead use the regular "merge commit" mode.
|
||||
7. You can now merge the PR on enterprise. Be sure to NOT use "squash and merge",
|
||||
7. You can now merge the PR on community. Be sure to NOT use "squash and merge",
|
||||
but instead use the regular "merge commit" mode.
|
||||
|
||||
The subsequent PRs on community will be able to pass the
|
||||
|
@ -346,3 +346,15 @@ foo = 2
|
|||
#endif
|
||||
```
|
||||
This was deemed to be error prone and not worth the effort.
|
||||
|
||||
## `fix_gitignore.sh`
|
||||
|
||||
This script checks and fixes issues with `.gitignore` rules:
|
||||
|
||||
1. Makes sure git ignores the `.sql` files and expected output files that are generated
|
||||
from `.source` template files. If you created or deleted a `.source` file in a commit,
|
||||
git ignore rules should be updated to reflect this change.
|
||||
|
||||
2. Makes sure we do not commit any generated files that should be ignored. If there is an
|
||||
ignored file in the git tree, the user is expected to review the files that are removed
|
||||
from the git tree and commit them.
|
||||
|
|
|
@ -65,6 +65,14 @@ fi
|
|||
# undo partial merge
|
||||
git merge --abort
|
||||
|
||||
# If we have a conflict on enterprise merge on the master branch, we have a problem.
|
||||
# Provide an error message to indicate that enterprise merge is needed.
|
||||
if [[ $PR_BRANCH = master ]]; then
|
||||
echo "ERROR: Master branch has merge conlicts with enterprise-master."
|
||||
echo "Try re-running this job if you merged community PR before enterprise PR. Otherwise conflicts need to be resolved as a separate PR on enterprise."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! git fetch enterprise "$PR_BRANCH" ; then
|
||||
echo "ERROR: enterprise/$PR_BRANCH was not found and community PR branch could not be merged into enterprise-master"
|
||||
exit 1
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
#! /bin/bash
|
||||
# shellcheck disable=SC2012
|
||||
|
||||
set -euo pipefail
|
||||
# shellcheck disable=SC1091
|
||||
source ci/ci_helpers.sh
|
||||
|
||||
# We list all the .source files in alphabetical order, and do a substitution
|
||||
# before writing the resulting file names that are created by those templates in
|
||||
# relevant .gitignore files
|
||||
#
|
||||
# 1. Capture the file name without the .source extension
|
||||
# 2. Add the desired extension at the end
|
||||
# 3. Add a / character at the beginning of each line to conform to .gitignore file format
|
||||
#
|
||||
# e.g. multi_copy.source -> /multi_copy.sql
|
||||
ls -1 src/test/regress/input | sed -E "s#(.*)\.source#/\1.sql#" > src/test/regress/sql/.gitignore
|
||||
|
||||
# e.g. multi_copy.source -> /multi_copy.out
|
||||
ls -1 src/test/regress/output | sed -E "s#(.*)\.source#/\1.out#" > src/test/regress/expected/.gitignore
|
||||
|
||||
# Remove all the ignored files from git tree, and error out
|
||||
# find all ignored files in git tree, and use quotation marks to prevent word splitting on filenames with spaces in them
|
||||
ignored_lines_in_git_tree=$(git ls-files --ignored --exclude-standard | sed 's/.*/"&"/')
|
||||
|
||||
if [[ -n $ignored_lines_in_git_tree ]]
|
||||
then
|
||||
echo "Ignored files should not be in git tree!"
|
||||
echo "${ignored_lines_in_git_tree}"
|
||||
|
||||
echo "Removing these files from git tree, please review and commit"
|
||||
echo "$ignored_lines_in_git_tree" | xargs git rm -r --cached
|
||||
exit 1
|
||||
fi
|
|
@ -14,3 +14,4 @@ ci/remove_useless_declarations.sh
|
|||
ci/disallow_c_comments_in_migrations.sh
|
||||
ci/disallow_long_changelog_entries.sh
|
||||
ci/normalize_expected.sh
|
||||
ci/fix_gitignore.sh
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for Citus 10.1devel.
|
||||
# Generated by GNU Autoconf 2.69 for Citus 10.2devel.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
|
@ -579,8 +579,8 @@ MAKEFLAGS=
|
|||
# Identity of this package.
|
||||
PACKAGE_NAME='Citus'
|
||||
PACKAGE_TARNAME='citus'
|
||||
PACKAGE_VERSION='10.1devel'
|
||||
PACKAGE_STRING='Citus 10.1devel'
|
||||
PACKAGE_VERSION='10.2devel'
|
||||
PACKAGE_STRING='Citus 10.2devel'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
|
@ -1260,7 +1260,7 @@ if test "$ac_init_help" = "long"; then
|
|||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures Citus 10.1devel to adapt to many kinds of systems.
|
||||
\`configure' configures Citus 10.2devel to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
|
@ -1322,7 +1322,7 @@ fi
|
|||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of Citus 10.1devel:";;
|
||||
short | recursive ) echo "Configuration of Citus 10.2devel:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
|
@ -1425,7 +1425,7 @@ fi
|
|||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
Citus configure 10.1devel
|
||||
Citus configure 10.2devel
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
|
@ -1908,7 +1908,7 @@ cat >config.log <<_ACEOF
|
|||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by Citus $as_me 10.1devel, which was
|
||||
It was created by Citus $as_me 10.2devel, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
|
@ -5356,7 +5356,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by Citus $as_me 10.1devel, which was
|
||||
This file was extended by Citus $as_me 10.2devel, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
|
@ -5418,7 +5418,7 @@ _ACEOF
|
|||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
Citus config.status 10.1devel
|
||||
Citus config.status 10.2devel
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
# everyone needing autoconf installed, the resulting files are checked
|
||||
# into the SCM.
|
||||
|
||||
AC_INIT([Citus], [10.1devel])
|
||||
AC_INIT([Citus], [10.2devel])
|
||||
AC_COPYRIGHT([Copyright (c) Citus Data, Inc.])
|
||||
|
||||
# we'll need sed and awk for some of the version commands
|
||||
|
|
|
@ -2076,6 +2076,15 @@ alter_columnar_table_set(PG_FUNCTION_ARGS)
|
|||
if (!PG_ARGISNULL(1))
|
||||
{
|
||||
options.chunkRowCount = PG_GETARG_INT32(1);
|
||||
if (options.chunkRowCount < CHUNK_ROW_COUNT_MINIMUM ||
|
||||
options.chunkRowCount > CHUNK_ROW_COUNT_MAXIMUM)
|
||||
{
|
||||
ereport(ERROR, (errmsg("chunk group row count limit out of range"),
|
||||
errhint("chunk group row count limit must be between "
|
||||
UINT64_FORMAT " and " UINT64_FORMAT,
|
||||
(uint64) CHUNK_ROW_COUNT_MINIMUM,
|
||||
(uint64) CHUNK_ROW_COUNT_MAXIMUM)));
|
||||
}
|
||||
ereport(DEBUG1,
|
||||
(errmsg("updating chunk row count to %d", options.chunkRowCount)));
|
||||
}
|
||||
|
@ -2084,6 +2093,15 @@ alter_columnar_table_set(PG_FUNCTION_ARGS)
|
|||
if (!PG_ARGISNULL(2))
|
||||
{
|
||||
options.stripeRowCount = PG_GETARG_INT32(2);
|
||||
if (options.stripeRowCount < STRIPE_ROW_COUNT_MINIMUM ||
|
||||
options.stripeRowCount > STRIPE_ROW_COUNT_MAXIMUM)
|
||||
{
|
||||
ereport(ERROR, (errmsg("stripe row count limit out of range"),
|
||||
errhint("stripe row count limit must be between "
|
||||
UINT64_FORMAT " and " UINT64_FORMAT,
|
||||
(uint64) STRIPE_ROW_COUNT_MINIMUM,
|
||||
(uint64) STRIPE_ROW_COUNT_MAXIMUM)));
|
||||
}
|
||||
ereport(DEBUG1, (errmsg(
|
||||
"updating stripe row count to " UINT64_FORMAT,
|
||||
options.stripeRowCount)));
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# Citus extension
|
||||
comment = 'Citus distributed database'
|
||||
default_version = '10.1-1'
|
||||
default_version = '10.2-1'
|
||||
module_pathname = '$libdir/citus'
|
||||
relocatable = false
|
||||
schema = pg_catalog
|
||||
|
|
|
@ -204,7 +204,6 @@ static char * GetAccessMethodForMatViewIfExists(Oid viewOid);
|
|||
static bool WillRecreateForeignKeyToReferenceTable(Oid relationId,
|
||||
CascadeToColocatedOption cascadeOption);
|
||||
static void WarningsForDroppingForeignKeysWithDistributedTables(Oid relationId);
|
||||
static void ExecuteQueryViaSPI(char *query, int SPIOK);
|
||||
|
||||
PG_FUNCTION_INFO_V1(undistribute_table);
|
||||
PG_FUNCTION_INFO_V1(alter_distributed_table);
|
||||
|
@ -219,11 +218,11 @@ PG_FUNCTION_INFO_V1(worker_change_sequence_dependency);
|
|||
Datum
|
||||
undistribute_table(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
bool cascadeViaForeignKeys = PG_GETARG_BOOL(1);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
TableConversionParameters params = {
|
||||
.relationId = relationId,
|
||||
.cascadeViaForeignKeys = cascadeViaForeignKeys
|
||||
|
@ -243,6 +242,8 @@ undistribute_table(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
alter_distributed_table(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
|
||||
char *distributionColumn = NULL;
|
||||
|
@ -280,9 +281,6 @@ alter_distributed_table(PG_FUNCTION_ARGS)
|
|||
}
|
||||
}
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
|
||||
TableConversionParameters params = {
|
||||
.relationId = relationId,
|
||||
.distributionColumn = distributionColumn,
|
||||
|
@ -305,13 +303,13 @@ alter_distributed_table(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
alter_table_set_access_method(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
|
||||
text *accessMethodText = PG_GETARG_TEXT_P(1);
|
||||
char *accessMethod = text_to_cstring(accessMethodText);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
TableConversionParameters params = {
|
||||
.relationId = relationId,
|
||||
.accessMethod = accessMethod
|
||||
|
@ -558,8 +556,11 @@ ConvertTable(TableConversionState *con)
|
|||
|
||||
includeIndexes = false;
|
||||
}
|
||||
|
||||
bool includeReplicaIdentity = true;
|
||||
List *postLoadCommands = GetPostLoadTableCreationCommands(con->relationId,
|
||||
includeIndexes);
|
||||
includeIndexes,
|
||||
includeReplicaIdentity);
|
||||
List *justBeforeDropCommands = NIL;
|
||||
List *attachPartitionCommands = NIL;
|
||||
|
||||
|
|
|
@ -179,7 +179,7 @@ remove_local_tables_from_metadata(PG_FUNCTION_ARGS)
|
|||
* properties:
|
||||
* - it will have only one shard,
|
||||
* - its distribution method will be DISTRIBUTE_BY_NONE,
|
||||
* - its replication model will be ReplicationModel,
|
||||
* - its replication model will be REPLICATION_MODEL_STREAMING,
|
||||
* - its replication factor will be set to 1.
|
||||
* Similar to reference tables, it has only 1 placement. In addition to that, that
|
||||
* single placement is only allowed to be on the coordinator.
|
||||
|
@ -996,9 +996,7 @@ InsertMetadataForCitusLocalTable(Oid citusLocalTableId, uint64 shardId)
|
|||
Assert(shardId != INVALID_SHARD_ID);
|
||||
|
||||
char distributionMethod = DISTRIBUTE_BY_NONE;
|
||||
char replicationModel = ReplicationModel;
|
||||
|
||||
Assert(replicationModel != REPLICATION_MODEL_2PC);
|
||||
char replicationModel = REPLICATION_MODEL_STREAMING;
|
||||
|
||||
uint32 colocationId = INVALID_COLOCATION_ID;
|
||||
Var *distributionColumn = NULL;
|
||||
|
|
|
@ -86,13 +86,9 @@
|
|||
*/
|
||||
#define LOG_PER_TUPLE_AMOUNT 1000000
|
||||
|
||||
|
||||
/* Replication model to use when creating distributed tables */
|
||||
int ReplicationModel = REPLICATION_MODEL_COORDINATOR;
|
||||
|
||||
|
||||
/* local function forward declarations */
|
||||
static char DecideReplicationModel(char distributionMethod, bool viaDeprecatedAPI);
|
||||
static char DecideReplicationModel(char distributionMethod, char *colocateWithTableName,
|
||||
bool viaDeprecatedAPI);
|
||||
static void CreateHashDistributedTableShards(Oid relationId, int shardCount,
|
||||
Oid colocatedTableId, bool localTableEmpty);
|
||||
static uint32 ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
|
||||
|
@ -119,7 +115,6 @@ static List * GetFKeyCreationCommandsRelationInvolvedWithTableType(Oid relationI
|
|||
int tableTypeFlag);
|
||||
static Oid DropFKeysAndUndistributeTable(Oid relationId);
|
||||
static void DropFKeysRelationInvolvedWithTableType(Oid relationId, int tableTypeFlag);
|
||||
static bool LocalTableEmpty(Oid tableId);
|
||||
static void CopyLocalDataIntoShards(Oid relationId);
|
||||
static List * TupleDescColumnNameList(TupleDesc tupleDescriptor);
|
||||
static bool DistributionColumnUsesGeneratedStoredColumn(TupleDesc relationDesc,
|
||||
|
@ -146,12 +141,11 @@ PG_FUNCTION_INFO_V1(create_reference_table);
|
|||
Datum
|
||||
master_create_distributed_table(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
text *distributionColumnText = PG_GETARG_TEXT_P(1);
|
||||
Oid distributionMethodOid = PG_GETARG_OID(2);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
EnsureCitusTableCanBeCreated(relationId);
|
||||
|
||||
char *colocateWithTableName = NULL;
|
||||
|
@ -193,6 +187,8 @@ master_create_distributed_table(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
create_distributed_table(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2) || PG_ARGISNULL(3))
|
||||
{
|
||||
PG_RETURN_VOID();
|
||||
|
@ -225,8 +221,6 @@ create_distributed_table(PG_FUNCTION_ARGS)
|
|||
shardCountIsStrict = true;
|
||||
}
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
EnsureCitusTableCanBeCreated(relationId);
|
||||
|
||||
/* enable create_distributed_table on an empty node */
|
||||
|
@ -275,6 +269,7 @@ create_distributed_table(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
create_reference_table(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
|
||||
char *colocateWithTableName = NULL;
|
||||
|
@ -282,8 +277,6 @@ create_reference_table(PG_FUNCTION_ARGS)
|
|||
|
||||
bool viaDeprecatedAPI = false;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
EnsureCitusTableCanBeCreated(relationId);
|
||||
|
||||
/* enable create_reference_table on an empty node */
|
||||
|
@ -442,6 +435,7 @@ CreateDistributedTable(Oid relationId, Var *distributionColumn, char distributio
|
|||
EnsureDependenciesExistOnAllNodes(&tableAddress);
|
||||
|
||||
char replicationModel = DecideReplicationModel(distributionMethod,
|
||||
colocateWithTableName,
|
||||
viaDeprecatedAPI);
|
||||
|
||||
/*
|
||||
|
@ -464,7 +458,7 @@ CreateDistributedTable(Oid relationId, Var *distributionColumn, char distributio
|
|||
EnsureReferenceTablesExistOnAllNodes();
|
||||
|
||||
/* we need to calculate these variables before creating distributed metadata */
|
||||
bool localTableEmpty = LocalTableEmpty(relationId);
|
||||
bool localTableEmpty = TableEmpty(relationId);
|
||||
Oid colocatedTableId = ColocatedTableId(colocationId);
|
||||
|
||||
/* create an entry for distributed table in pg_dist_partition */
|
||||
|
@ -631,44 +625,38 @@ DropFKeysRelationInvolvedWithTableType(Oid relationId, int tableTypeFlag)
|
|||
|
||||
/*
|
||||
* DecideReplicationModel function decides which replication model should be
|
||||
* used depending on given distribution configuration and global ReplicationModel
|
||||
* variable. If ReplicationModel conflicts with distribution configuration, this
|
||||
* function errors out.
|
||||
* used depending on given distribution configuration.
|
||||
*/
|
||||
static char
|
||||
DecideReplicationModel(char distributionMethod, bool viaDeprecatedAPI)
|
||||
DecideReplicationModel(char distributionMethod, char *colocateWithTableName, bool
|
||||
viaDeprecatedAPI)
|
||||
{
|
||||
if (viaDeprecatedAPI)
|
||||
{
|
||||
if (ReplicationModel != REPLICATION_MODEL_COORDINATOR)
|
||||
{
|
||||
ereport(NOTICE, (errmsg("using statement-based replication"),
|
||||
errdetail("The current replication_model setting is "
|
||||
"'streaming', which is not supported by "
|
||||
"master_create_distributed_table."),
|
||||
errhint("Use create_distributed_table to use the streaming "
|
||||
"replication model.")));
|
||||
}
|
||||
|
||||
return REPLICATION_MODEL_COORDINATOR;
|
||||
}
|
||||
else if (distributionMethod == DISTRIBUTE_BY_NONE)
|
||||
{
|
||||
return REPLICATION_MODEL_2PC;
|
||||
}
|
||||
else if (distributionMethod == DISTRIBUTE_BY_HASH)
|
||||
else if (pg_strncasecmp(colocateWithTableName, "default", NAMEDATALEN) != 0 &&
|
||||
!IsColocateWithNone(colocateWithTableName))
|
||||
{
|
||||
return ReplicationModel;
|
||||
text *colocateWithTableNameText = cstring_to_text(colocateWithTableName);
|
||||
Oid colocatedRelationId = ResolveRelationId(colocateWithTableNameText, false);
|
||||
CitusTableCacheEntry *targetTableEntry = GetCitusTableCacheEntry(
|
||||
colocatedRelationId);
|
||||
char replicationModel = targetTableEntry->replicationModel;
|
||||
|
||||
return replicationModel;
|
||||
}
|
||||
else if (distributionMethod == DISTRIBUTE_BY_HASH &&
|
||||
!DistributedTableReplicationIsEnabled())
|
||||
{
|
||||
return REPLICATION_MODEL_STREAMING;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ReplicationModel != REPLICATION_MODEL_COORDINATOR)
|
||||
{
|
||||
ereport(NOTICE, (errmsg("using statement-based replication"),
|
||||
errdetail("Streaming replication is supported only for "
|
||||
"hash-distributed tables.")));
|
||||
}
|
||||
|
||||
return REPLICATION_MODEL_COORDINATOR;
|
||||
}
|
||||
|
||||
|
@ -863,7 +851,6 @@ EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn,
|
|||
|
||||
EnsureTableNotDistributed(relationId);
|
||||
EnsureLocalTableEmptyIfNecessary(relationId, distributionMethod, viaDeprecatedAPI);
|
||||
EnsureReplicationSettings(InvalidOid, replicationModel);
|
||||
EnsureRelationHasNoTriggers(relationId);
|
||||
|
||||
/* we assume callers took necessary locks */
|
||||
|
@ -1125,7 +1112,7 @@ static void
|
|||
EnsureLocalTableEmpty(Oid relationId)
|
||||
{
|
||||
char *relationName = get_rel_name(relationId);
|
||||
bool localTableEmpty = LocalTableEmpty(relationId);
|
||||
bool localTableEmpty = TableEmpty(relationId);
|
||||
|
||||
if (!localTableEmpty)
|
||||
{
|
||||
|
@ -1156,36 +1143,6 @@ EnsureTableNotDistributed(Oid relationId)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* EnsureReplicationSettings checks whether the current replication factor
|
||||
* setting is compatible with the replication model. This function errors
|
||||
* out if caller tries to use streaming replication with more than one
|
||||
* replication factor.
|
||||
*/
|
||||
void
|
||||
EnsureReplicationSettings(Oid relationId, char replicationModel)
|
||||
{
|
||||
char *msgSuffix = "the streaming replication model";
|
||||
char *extraHint = " or setting \"citus.replication_model\" to \"statement\"";
|
||||
|
||||
if (relationId != InvalidOid)
|
||||
{
|
||||
msgSuffix = "tables which use the streaming replication model";
|
||||
extraHint = "";
|
||||
}
|
||||
|
||||
if (replicationModel == REPLICATION_MODEL_STREAMING &&
|
||||
DistributedTableReplicationIsEnabled())
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("replication factors above one are incompatible with %s",
|
||||
msgSuffix),
|
||||
errhint("Try again after reducing \"citus.shard_replication_"
|
||||
"factor\" to one%s.", extraHint)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* EnsureRelationHasNoTriggers errors out if the given table has triggers on
|
||||
* it. See also GetExplicitTriggerIdList function's comment for the triggers this
|
||||
|
@ -1291,50 +1248,40 @@ SupportFunctionForColumn(Var *partitionColumn, Oid accessMethodId,
|
|||
|
||||
|
||||
/*
|
||||
* LocalTableEmpty function checks whether given local table contains any row and
|
||||
* returns false if there is any data. This function is only for local tables and
|
||||
* should not be called for distributed tables.
|
||||
* TableEmpty function checks whether given table contains any row and
|
||||
* returns false if there is any data.
|
||||
*/
|
||||
static bool
|
||||
LocalTableEmpty(Oid tableId)
|
||||
bool
|
||||
TableEmpty(Oid tableId)
|
||||
{
|
||||
Oid schemaId = get_rel_namespace(tableId);
|
||||
char *schemaName = get_namespace_name(schemaId);
|
||||
char *tableName = get_rel_name(tableId);
|
||||
char *tableQualifiedName = quote_qualified_identifier(schemaName, tableName);
|
||||
|
||||
StringInfo selectExistQueryString = makeStringInfo();
|
||||
StringInfo selectTrueQueryString = makeStringInfo();
|
||||
|
||||
bool columnNull = false;
|
||||
bool readOnly = true;
|
||||
|
||||
int rowId = 0;
|
||||
int attributeId = 1;
|
||||
|
||||
AssertArg(!IsCitusTable(tableId));
|
||||
|
||||
int spiConnectionResult = SPI_connect();
|
||||
if (spiConnectionResult != SPI_OK_CONNECT)
|
||||
{
|
||||
ereport(ERROR, (errmsg("could not connect to SPI manager")));
|
||||
}
|
||||
|
||||
appendStringInfo(selectExistQueryString, SELECT_EXIST_QUERY, tableQualifiedName);
|
||||
appendStringInfo(selectTrueQueryString, SELECT_TRUE_QUERY, tableQualifiedName);
|
||||
|
||||
int spiQueryResult = SPI_execute(selectExistQueryString->data, readOnly, 0);
|
||||
int spiQueryResult = SPI_execute(selectTrueQueryString->data, readOnly, 0);
|
||||
if (spiQueryResult != SPI_OK_SELECT)
|
||||
{
|
||||
ereport(ERROR, (errmsg("execution was not successful \"%s\"",
|
||||
selectExistQueryString->data)));
|
||||
selectTrueQueryString->data)));
|
||||
}
|
||||
|
||||
/* we expect that SELECT EXISTS query will return single value in a single row */
|
||||
Assert(SPI_processed == 1);
|
||||
/* we expect that SELECT TRUE query will return single value in a single row OR empty set */
|
||||
Assert(SPI_processed == 1 || SPI_processed == 0);
|
||||
|
||||
HeapTuple tuple = SPI_tuptable->vals[rowId];
|
||||
Datum hasDataDatum = SPI_getbinval(tuple, SPI_tuptable->tupdesc, attributeId,
|
||||
&columnNull);
|
||||
bool localTableEmpty = !DatumGetBool(hasDataDatum);
|
||||
bool localTableEmpty = !SPI_processed;
|
||||
|
||||
SPI_finish();
|
||||
|
||||
|
|
|
@ -0,0 +1,215 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* database.c
|
||||
* Commands to interact with the database object in a distributed
|
||||
* environment.
|
||||
*
|
||||
* Copyright (c) Citus Data, Inc.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/htup_details.h"
|
||||
#include "access/xact.h"
|
||||
#include "catalog/objectaddress.h"
|
||||
#include "catalog/pg_database.h"
|
||||
#include "commands/dbcommands.h"
|
||||
#include "miscadmin.h"
|
||||
#include "nodes/parsenodes.h"
|
||||
#include "utils/syscache.h"
|
||||
|
||||
#include "distributed/commands.h"
|
||||
#include "distributed/commands/utility_hook.h"
|
||||
#include "distributed/deparser.h"
|
||||
#include "distributed/metadata_sync.h"
|
||||
#include "distributed/metadata_utility.h"
|
||||
#include "distributed/multi_executor.h"
|
||||
#include "distributed/relation_access_tracking.h"
|
||||
#include "distributed/worker_transaction.h"
|
||||
|
||||
static void EnsureSequentialModeForDatabaseDDL(void);
|
||||
static AlterOwnerStmt * RecreateAlterDatabaseOwnerStmt(Oid databaseOid);
|
||||
static Oid get_database_owner(Oid db_oid);
|
||||
|
||||
/* controlled via GUC */
|
||||
bool EnableAlterDatabaseOwner = false;
|
||||
|
||||
|
||||
/*
|
||||
* PreprocessAlterDatabaseOwnerStmt is called during the utility hook before the alter
|
||||
* command is applied locally on the coordinator. This will verify if the command needs to
|
||||
* be propagated to the workers and if so prepares a list of ddl commands to execute.
|
||||
*/
|
||||
List *
|
||||
PreprocessAlterDatabaseOwnerStmt(Node *node, const char *queryString,
|
||||
ProcessUtilityContext processUtilityContext)
|
||||
{
|
||||
AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node);
|
||||
Assert(stmt->objectType == OBJECT_DATABASE);
|
||||
|
||||
ObjectAddress typeAddress = GetObjectAddressFromParseTree((Node *) stmt, false);
|
||||
if (!ShouldPropagateObject(&typeAddress))
|
||||
{
|
||||
return NIL;
|
||||
}
|
||||
|
||||
if (!EnableAlterDatabaseOwner)
|
||||
{
|
||||
/* don't propagate if GUC is turned off */
|
||||
return NIL;
|
||||
}
|
||||
|
||||
EnsureCoordinator();
|
||||
|
||||
QualifyTreeNode((Node *) stmt);
|
||||
const char *sql = DeparseTreeNode((Node *) stmt);
|
||||
|
||||
EnsureSequentialModeForDatabaseDDL();
|
||||
List *commands = list_make3(DISABLE_DDL_PROPAGATION,
|
||||
(void *) sql,
|
||||
ENABLE_DDL_PROPAGATION);
|
||||
|
||||
return NodeDDLTaskList(NON_COORDINATOR_NODES, commands);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* PostprocessAlterDatabaseOwnerStmt is called during the utility hook after the alter
|
||||
* database command has been applied locally.
|
||||
*
|
||||
* Its main purpose is to propagate the newly formed dependencies onto the nodes before
|
||||
* applying the change of owner of the databse. This ensures, for systems that have role
|
||||
* management, that the roles will be created before applying the alter owner command.
|
||||
*/
|
||||
List *
|
||||
PostprocessAlterDatabaseOwnerStmt(Node *node, const char *queryString)
|
||||
{
|
||||
AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node);
|
||||
Assert(stmt->objectType == OBJECT_DATABASE);
|
||||
|
||||
ObjectAddress typeAddress = GetObjectAddressFromParseTree((Node *) stmt, false);
|
||||
if (!ShouldPropagateObject(&typeAddress))
|
||||
{
|
||||
return NIL;
|
||||
}
|
||||
|
||||
if (!EnableAlterDatabaseOwner)
|
||||
{
|
||||
/* don't propagate if GUC is turned off */
|
||||
return NIL;
|
||||
}
|
||||
|
||||
EnsureDependenciesExistOnAllNodes(&typeAddress);
|
||||
return NIL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AlterDatabaseOwnerObjectAddress returns the ObjectAddress of the database that is the
|
||||
* object of the AlterOwnerStmt. Errors if missing_ok is false.
|
||||
*/
|
||||
ObjectAddress
|
||||
AlterDatabaseOwnerObjectAddress(Node *node, bool missing_ok)
|
||||
{
|
||||
AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node);
|
||||
Assert(stmt->objectType == OBJECT_DATABASE);
|
||||
|
||||
Oid databaseOid = get_database_oid(strVal((Value *) stmt->object), missing_ok);
|
||||
ObjectAddress address = { 0 };
|
||||
ObjectAddressSet(address, DatabaseRelationId, databaseOid);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DatabaseOwnerDDLCommands returns a list of sql statements to idempotently apply a
|
||||
* change of the database owner on the workers so that the database is owned by the same
|
||||
* user on all nodes in the cluster.
|
||||
*/
|
||||
List *
|
||||
DatabaseOwnerDDLCommands(const ObjectAddress *address)
|
||||
{
|
||||
Node *stmt = (Node *) RecreateAlterDatabaseOwnerStmt(address->objectId);
|
||||
return list_make1(DeparseTreeNode(stmt));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RecreateAlterDatabaseOwnerStmt creates an AlterOwnerStmt that represents the operation
|
||||
* of changing the owner of the database to its current owner.
|
||||
*/
|
||||
static AlterOwnerStmt *
|
||||
RecreateAlterDatabaseOwnerStmt(Oid databaseOid)
|
||||
{
|
||||
AlterOwnerStmt *stmt = makeNode(AlterOwnerStmt);
|
||||
|
||||
stmt->objectType = OBJECT_DATABASE;
|
||||
stmt->object = (Node *) makeString(get_database_name(databaseOid));
|
||||
|
||||
Oid ownerOid = get_database_owner(databaseOid);
|
||||
stmt->newowner = makeNode(RoleSpec);
|
||||
stmt->newowner->roletype = ROLESPEC_CSTRING;
|
||||
stmt->newowner->rolename = GetUserNameFromId(ownerOid, false);
|
||||
|
||||
return stmt;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* get_database_owner returns the Oid of the role owning the database
|
||||
*/
|
||||
static Oid
|
||||
get_database_owner(Oid db_oid)
|
||||
{
|
||||
HeapTuple tuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(db_oid));
|
||||
if (!HeapTupleIsValid(tuple))
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE),
|
||||
errmsg("database with OID %u does not exist", db_oid)));
|
||||
}
|
||||
|
||||
Oid dba = ((Form_pg_database) GETSTRUCT(tuple))->datdba;
|
||||
|
||||
ReleaseSysCache(tuple);
|
||||
|
||||
return dba;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* EnsureSequentialModeForDatabaseDDL makes sure that the current transaction is already
|
||||
* in sequential mode, or can still safely be put in sequential mode, it errors if that is
|
||||
* not possible. The error contains information for the user to retry the transaction with
|
||||
* sequential mode set from the beginning.
|
||||
*/
|
||||
static void
|
||||
EnsureSequentialModeForDatabaseDDL(void)
|
||||
{
|
||||
if (!IsTransactionBlock())
|
||||
{
|
||||
/* we do not need to switch to sequential mode if we are not in a transaction */
|
||||
return;
|
||||
}
|
||||
|
||||
if (ParallelQueryExecutedInTransaction())
|
||||
{
|
||||
ereport(ERROR, (errmsg("cannot create or modify database because there was a "
|
||||
"parallel operation on a distributed table in the "
|
||||
"transaction"),
|
||||
errdetail("When creating or altering a database, Citus needs to "
|
||||
"perform all operations over a single connection per "
|
||||
"node to ensure consistency."),
|
||||
errhint("Try re-running the transaction with "
|
||||
"\"SET LOCAL citus.multi_shard_modify_mode TO "
|
||||
"\'sequential\';\"")));
|
||||
}
|
||||
|
||||
ereport(DEBUG1, (errmsg("switching to sequential query execution mode"),
|
||||
errdetail("Database is created or altered. To make sure subsequent "
|
||||
"commands see the type correctly we need to make sure to "
|
||||
"use only one connection for all future commands")));
|
||||
SetLocalMultiShardModifyModeToSequential();
|
||||
}
|
|
@ -14,6 +14,7 @@
|
|||
#include "catalog/objectaddress.h"
|
||||
#include "commands/extension.h"
|
||||
#include "distributed/commands.h"
|
||||
#include "distributed/commands/utility_hook.h"
|
||||
#include "distributed/connection_management.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/metadata/dependency.h"
|
||||
|
@ -191,6 +192,20 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency)
|
|||
return CreateCollationDDLsIdempotent(dependency->objectId);
|
||||
}
|
||||
|
||||
case OCLASS_DATABASE:
|
||||
{
|
||||
List *databaseDDLCommands = NIL;
|
||||
|
||||
/* only propagate the ownership of the database when the feature is on */
|
||||
if (EnableAlterDatabaseOwner)
|
||||
{
|
||||
List *ownerDDLCommands = DatabaseOwnerDDLCommands(dependency);
|
||||
databaseDDLCommands = list_concat(databaseDDLCommands, ownerDDLCommands);
|
||||
}
|
||||
|
||||
return databaseDDLCommands;
|
||||
}
|
||||
|
||||
case OCLASS_PROC:
|
||||
{
|
||||
return CreateFunctionDDLCommandsIdempotent(dependency);
|
||||
|
|
|
@ -240,6 +240,13 @@ static DistributeObjectOps Collation_Rename = {
|
|||
.postprocess = NULL,
|
||||
.address = RenameCollationStmtObjectAddress,
|
||||
};
|
||||
static DistributeObjectOps Database_AlterOwner = {
|
||||
.deparse = DeparseAlterDatabaseOwnerStmt,
|
||||
.qualify = NULL,
|
||||
.preprocess = PreprocessAlterDatabaseOwnerStmt,
|
||||
.postprocess = PostprocessAlterDatabaseOwnerStmt,
|
||||
.address = AlterDatabaseOwnerObjectAddress,
|
||||
};
|
||||
static DistributeObjectOps Extension_AlterObjectSchema = {
|
||||
.deparse = DeparseAlterExtensionSchemaStmt,
|
||||
.qualify = NULL,
|
||||
|
@ -359,6 +366,34 @@ static DistributeObjectOps Routine_AlterObjectDepends = {
|
|||
.postprocess = NULL,
|
||||
.address = AlterFunctionDependsStmtObjectAddress,
|
||||
};
|
||||
static DistributeObjectOps Sequence_Alter = {
|
||||
.deparse = NULL,
|
||||
.qualify = NULL,
|
||||
.preprocess = PreprocessAlterSequenceStmt,
|
||||
.postprocess = NULL,
|
||||
.address = AlterSequenceObjectAddress,
|
||||
};
|
||||
static DistributeObjectOps Sequence_AlterObjectSchema = {
|
||||
.deparse = NULL,
|
||||
.qualify = NULL,
|
||||
.preprocess = PreprocessAlterSequenceSchemaStmt,
|
||||
.postprocess = NULL,
|
||||
.address = AlterSequenceSchemaStmtObjectAddress,
|
||||
};
|
||||
static DistributeObjectOps Sequence_Drop = {
|
||||
.deparse = DeparseDropSequenceStmt,
|
||||
.qualify = NULL,
|
||||
.preprocess = PreprocessDropSequenceStmt,
|
||||
.postprocess = NULL,
|
||||
.address = NULL,
|
||||
};
|
||||
static DistributeObjectOps Sequence_Rename = {
|
||||
.deparse = DeparseRenameSequenceStmt,
|
||||
.qualify = QualifyRenameSequenceStmt,
|
||||
.preprocess = PreprocessRenameSequenceStmt,
|
||||
.postprocess = NULL,
|
||||
.address = RenameSequenceStmtObjectAddress,
|
||||
};
|
||||
static DistributeObjectOps Trigger_AlterObjectDepends = {
|
||||
.deparse = NULL,
|
||||
.qualify = NULL,
|
||||
|
@ -453,7 +488,7 @@ static DistributeObjectOps Statistics_Rename = {
|
|||
.address = NULL,
|
||||
};
|
||||
static DistributeObjectOps Table_AlterTable = {
|
||||
.deparse = NULL,
|
||||
.deparse = DeparseAlterTableStmt,
|
||||
.qualify = NULL,
|
||||
.preprocess = PreprocessAlterTableStmt,
|
||||
.postprocess = NULL,
|
||||
|
@ -621,6 +656,11 @@ GetDistributeObjectOps(Node *node)
|
|||
return &Routine_AlterObjectSchema;
|
||||
}
|
||||
|
||||
case OBJECT_SEQUENCE:
|
||||
{
|
||||
return &Sequence_AlterObjectSchema;
|
||||
}
|
||||
|
||||
case OBJECT_STATISTIC_EXT:
|
||||
{
|
||||
return &Statistics_AlterObjectSchema;
|
||||
|
@ -658,6 +698,11 @@ GetDistributeObjectOps(Node *node)
|
|||
return &Collation_AlterOwner;
|
||||
}
|
||||
|
||||
case OBJECT_DATABASE:
|
||||
{
|
||||
return &Database_AlterOwner;
|
||||
}
|
||||
|
||||
case OBJECT_FUNCTION:
|
||||
{
|
||||
return &Function_AlterOwner;
|
||||
|
@ -705,6 +750,11 @@ GetDistributeObjectOps(Node *node)
|
|||
return &Any_AlterRoleSet;
|
||||
}
|
||||
|
||||
case T_AlterSeqStmt:
|
||||
{
|
||||
return &Sequence_Alter;
|
||||
}
|
||||
|
||||
#if PG_VERSION_NUM >= PG_VERSION_13
|
||||
case T_AlterStatsStmt:
|
||||
{
|
||||
|
@ -861,6 +911,11 @@ GetDistributeObjectOps(Node *node)
|
|||
return &Schema_Drop;
|
||||
}
|
||||
|
||||
case OBJECT_SEQUENCE:
|
||||
{
|
||||
return &Sequence_Drop;
|
||||
}
|
||||
|
||||
case OBJECT_STATISTIC_EXT:
|
||||
{
|
||||
return &Statistics_Drop;
|
||||
|
@ -955,6 +1010,11 @@ GetDistributeObjectOps(Node *node)
|
|||
return &Schema_Rename;
|
||||
}
|
||||
|
||||
case OBJECT_SEQUENCE:
|
||||
{
|
||||
return &Sequence_Rename;
|
||||
}
|
||||
|
||||
case OBJECT_STATISTIC_EXT:
|
||||
{
|
||||
return &Statistics_Rename;
|
||||
|
|
|
@ -59,6 +59,8 @@ master_drop_distributed_table_metadata(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
master_remove_partition_metadata(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
text *schemaNameText = PG_GETARG_TEXT_P(1);
|
||||
text *tableNameText = PG_GETARG_TEXT_P(2);
|
||||
|
@ -66,8 +68,6 @@ master_remove_partition_metadata(PG_FUNCTION_ARGS)
|
|||
char *schemaName = text_to_cstring(schemaNameText);
|
||||
char *tableName = text_to_cstring(tableNameText);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/*
|
||||
* The SQL_DROP trigger calls this function even for tables that are
|
||||
* not distributed. In that case, silently ignore. This is not very
|
||||
|
@ -97,6 +97,8 @@ master_remove_partition_metadata(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
master_remove_distributed_table_metadata_from_workers(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
text *schemaNameText = PG_GETARG_TEXT_P(1);
|
||||
text *tableNameText = PG_GETARG_TEXT_P(2);
|
||||
|
@ -104,8 +106,6 @@ master_remove_distributed_table_metadata_from_workers(PG_FUNCTION_ARGS)
|
|||
char *schemaName = text_to_cstring(schemaNameText);
|
||||
char *tableName = text_to_cstring(tableNameText);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
CheckTableSchemaNameForDrop(relationId, &schemaName, &tableName);
|
||||
|
||||
MasterRemoveDistributedTableMetadataFromWorkers(relationId, schemaName, tableName);
|
||||
|
|
|
@ -734,8 +734,8 @@ HasForeignKeyWithLocalTable(Oid relationId)
|
|||
|
||||
|
||||
/*
|
||||
* GetForeignKeysWithLocalTables returns a list foreign keys for foreign key
|
||||
* relationaships that relation has with local tables.
|
||||
* GetForeignKeysWithLocalTables returns a list of foreign keys for foreign key
|
||||
* relationships that relation has with local tables.
|
||||
*/
|
||||
static List *
|
||||
GetForeignKeysWithLocalTables(Oid relationId)
|
||||
|
@ -753,6 +753,21 @@ GetForeignKeysWithLocalTables(Oid relationId)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetForeignKeysFromLocalTables returns a list of foreign keys where the referencing
|
||||
* relation is a local table.
|
||||
*/
|
||||
List *
|
||||
GetForeignKeysFromLocalTables(Oid relationId)
|
||||
{
|
||||
int referencedFKeysFlag = INCLUDE_REFERENCED_CONSTRAINTS |
|
||||
INCLUDE_LOCAL_TABLES;
|
||||
List *referencingFKeyList = GetForeignKeyOids(relationId, referencedFKeysFlag);
|
||||
|
||||
return referencingFKeyList;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* HasForeignKeyToCitusLocalTable returns true if any of the foreign key constraints
|
||||
* on the relation with relationId references to a citus local table.
|
||||
|
@ -1102,6 +1117,30 @@ GetReferencedTableId(Oid foreignKeyId)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetReferencingTableId returns OID of the referencing relation for the foreign
|
||||
* key with foreignKeyId. If there is no such foreign key, then this function
|
||||
* returns InvalidOid.
|
||||
*/
|
||||
Oid
|
||||
GetReferencingTableId(Oid foreignKeyId)
|
||||
{
|
||||
HeapTuple heapTuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(foreignKeyId));
|
||||
if (!HeapTupleIsValid(heapTuple))
|
||||
{
|
||||
/* no such foreign key */
|
||||
return InvalidOid;
|
||||
}
|
||||
|
||||
Form_pg_constraint constraintForm = (Form_pg_constraint) GETSTRUCT(heapTuple);
|
||||
Oid referencingTableId = constraintForm->conrelid;
|
||||
|
||||
ReleaseSysCache(heapTuple);
|
||||
|
||||
return referencingTableId;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsTableTypeIncluded returns true if type of the table with relationId (distributed,
|
||||
* reference, Citus local or Postgres local) is included in the flags, false if not
|
||||
|
|
|
@ -461,15 +461,6 @@ GetFunctionColocationId(Oid functionOid, char *colocateWithTableName,
|
|||
EnsureFunctionCanBeColocatedWithTable(functionOid, distributionArgumentOid,
|
||||
colocatedTableId);
|
||||
}
|
||||
else if (ReplicationModel == REPLICATION_MODEL_COORDINATOR)
|
||||
{
|
||||
/* streaming replication model is required for metadata syncing */
|
||||
ereport(ERROR, (errmsg("cannot create a function with a distribution "
|
||||
"argument when citus.replication_model is "
|
||||
"'statement'"),
|
||||
errhint("Set citus.replication_model to 'streaming' "
|
||||
"before creating distributed tables")));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -537,7 +528,7 @@ EnsureFunctionCanBeColocatedWithTable(Oid functionOid, Oid distributionColumnTyp
|
|||
"with distributed tables that are created using "
|
||||
"streaming replication model."),
|
||||
errhint("When distributing tables make sure that "
|
||||
"citus.replication_model = 'streaming'")));
|
||||
"citus.shard_replication_factor = 1")));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1438,7 +1429,7 @@ PreprocessAlterFunctionSchemaStmt(Node *node, const char *queryString,
|
|||
|
||||
|
||||
/*
|
||||
* PreprocessAlterTypeOwnerStmt is called for change of owner ship of functions before the owner
|
||||
* PreprocessAlterFunctionOwnerStmt is called for change of owner ship of functions before the owner
|
||||
* ship is changed on the local instance.
|
||||
*
|
||||
* If the function for which the owner is changed is distributed we execute the change on
|
||||
|
|
|
@ -302,7 +302,8 @@ CreateIndexStmtGetSchemaId(IndexStmt *createIndexStatement)
|
|||
* It returns a list that is filled by the pgIndexProcessor.
|
||||
*/
|
||||
List *
|
||||
ExecuteFunctionOnEachTableIndex(Oid relationId, PGIndexProcessor pgIndexProcessor)
|
||||
ExecuteFunctionOnEachTableIndex(Oid relationId, PGIndexProcessor pgIndexProcessor,
|
||||
int indexFlags)
|
||||
{
|
||||
List *result = NIL;
|
||||
ScanKeyData scanKey[1];
|
||||
|
@ -324,7 +325,7 @@ ExecuteFunctionOnEachTableIndex(Oid relationId, PGIndexProcessor pgIndexProcesso
|
|||
while (HeapTupleIsValid(heapTuple))
|
||||
{
|
||||
Form_pg_index indexForm = (Form_pg_index) GETSTRUCT(heapTuple);
|
||||
pgIndexProcessor(indexForm, &result);
|
||||
pgIndexProcessor(indexForm, &result, indexFlags);
|
||||
|
||||
heapTuple = systable_getnext(scanDescriptor);
|
||||
}
|
||||
|
|
|
@ -2228,7 +2228,7 @@ CitusCopyDestReceiverStartup(DestReceiver *dest, int operation,
|
|||
if (cacheEntry->replicationModel == REPLICATION_MODEL_2PC ||
|
||||
MultiShardCommitProtocol == COMMIT_PROTOCOL_2PC)
|
||||
{
|
||||
CoordinatedTransactionShouldUse2PC();
|
||||
Use2PCForCoordinatedTransaction();
|
||||
}
|
||||
|
||||
/* define how tuples will be serialised */
|
||||
|
|
|
@ -14,14 +14,20 @@
|
|||
#include "catalog/dependency.h"
|
||||
#include "catalog/namespace.h"
|
||||
#include "commands/defrem.h"
|
||||
#include "commands/extension.h"
|
||||
#include "distributed/commands.h"
|
||||
#include "distributed/commands/sequence.h"
|
||||
#include "distributed/commands/utility_hook.h"
|
||||
#include "distributed/deparser.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/metadata/distobject.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/metadata_sync.h"
|
||||
#include "nodes/parsenodes.h"
|
||||
|
||||
/* Local functions forward declarations for helper functions */
|
||||
static bool OptionsSpecifyOwnedBy(List *optionList, Oid *ownedByTableId);
|
||||
static bool ShouldPropagateAlterSequence(const ObjectAddress *address);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -92,15 +98,6 @@ ErrorIfDistributedAlterSeqOwnedBy(AlterSeqStmt *alterSeqStmt)
|
|||
errmsg("cannot alter OWNED BY option of a sequence "
|
||||
"already owned by a distributed table")));
|
||||
}
|
||||
else if (!hasDistributedOwner && IsCitusTable(newOwnedByTableId))
|
||||
{
|
||||
/* and don't let local sequences get a distributed OWNED BY */
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("cannot associate an existing sequence with a "
|
||||
"distributed table"),
|
||||
errhint("Use a sequence in a distributed table by specifying "
|
||||
"a serial column type before creating any shards.")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -209,3 +206,305 @@ ExtractDefaultColumnsAndOwnedSequences(Oid relationId, List **columnNameList,
|
|||
|
||||
relation_close(relation, NoLock);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* PreprocessDropSequenceStmt gets called during the planning phase of a DROP SEQUENCE statement
|
||||
* and returns a list of DDLJob's that will drop any distributed sequences from the
|
||||
* workers.
|
||||
*
|
||||
* The DropStmt could have multiple objects to drop, the list of objects will be filtered
|
||||
* to only keep the distributed sequences for deletion on the workers. Non-distributed
|
||||
* sequences will still be dropped locally but not on the workers.
|
||||
*/
|
||||
List *
|
||||
PreprocessDropSequenceStmt(Node *node, const char *queryString,
|
||||
ProcessUtilityContext processUtilityContext)
|
||||
{
|
||||
DropStmt *stmt = castNode(DropStmt, node);
|
||||
List *deletingSequencesList = stmt->objects;
|
||||
List *distributedSequencesList = NIL;
|
||||
List *distributedSequenceAddresses = NIL;
|
||||
|
||||
Assert(stmt->removeType == OBJECT_SEQUENCE);
|
||||
|
||||
if (creating_extension)
|
||||
{
|
||||
/*
|
||||
* extensions should be created separately on the workers, sequences cascading
|
||||
* from an extension should therefor not be propagated here.
|
||||
*/
|
||||
return NIL;
|
||||
}
|
||||
|
||||
if (!EnableDependencyCreation)
|
||||
{
|
||||
/*
|
||||
* we are configured to disable object propagation, should not propagate anything
|
||||
*/
|
||||
return NIL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Our statements need to be fully qualified so we can drop them from the right schema
|
||||
* on the workers
|
||||
*/
|
||||
QualifyTreeNode((Node *) stmt);
|
||||
|
||||
/*
|
||||
* iterate over all sequences to be dropped and filter to keep only distributed
|
||||
* sequences.
|
||||
*/
|
||||
List *objectNameList = NULL;
|
||||
foreach_ptr(objectNameList, deletingSequencesList)
|
||||
{
|
||||
RangeVar *seq = makeRangeVarFromNameList(objectNameList);
|
||||
|
||||
Oid seqOid = RangeVarGetRelid(seq, NoLock, stmt->missing_ok);
|
||||
|
||||
ObjectAddress sequenceAddress = { 0 };
|
||||
ObjectAddressSet(sequenceAddress, RelationRelationId, seqOid);
|
||||
|
||||
if (!IsObjectDistributed(&sequenceAddress))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
/* collect information for all distributed sequences */
|
||||
ObjectAddress *addressp = palloc(sizeof(ObjectAddress));
|
||||
*addressp = sequenceAddress;
|
||||
distributedSequenceAddresses = lappend(distributedSequenceAddresses, addressp);
|
||||
distributedSequencesList = lappend(distributedSequencesList, objectNameList);
|
||||
}
|
||||
|
||||
if (list_length(distributedSequencesList) <= 0)
|
||||
{
|
||||
/* no distributed functions to drop */
|
||||
return NIL;
|
||||
}
|
||||
|
||||
/*
|
||||
* managing types can only be done on the coordinator if ddl propagation is on. when
|
||||
* it is off we will never get here. MX workers don't have a notion of distributed
|
||||
* types, so we block the call.
|
||||
*/
|
||||
EnsureCoordinator();
|
||||
|
||||
/* remove the entries for the distributed objects on dropping */
|
||||
ObjectAddress *address = NULL;
|
||||
foreach_ptr(address, distributedSequenceAddresses)
|
||||
{
|
||||
UnmarkObjectDistributed(address);
|
||||
}
|
||||
|
||||
/*
|
||||
* Swap the list of objects before deparsing and restore the old list after. This
|
||||
* ensures we only have distributed sequences in the deparsed drop statement.
|
||||
*/
|
||||
DropStmt *stmtCopy = copyObject(stmt);
|
||||
stmtCopy->objects = distributedSequencesList;
|
||||
stmtCopy->missing_ok = true;
|
||||
const char *dropStmtSql = DeparseTreeNode((Node *) stmtCopy);
|
||||
|
||||
List *commands = list_make3(DISABLE_DDL_PROPAGATION,
|
||||
(void *) dropStmtSql,
|
||||
ENABLE_DDL_PROPAGATION);
|
||||
|
||||
return NodeDDLTaskList(NON_COORDINATOR_NODES, commands);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* PreprocessRenameSequenceStmt is called when the user is renaming a sequence. The invocation
|
||||
* happens before the statement is applied locally.
|
||||
*
|
||||
* As the sequence already exists we have access to the ObjectAddress, this is used to
|
||||
* check if it is distributed. If so the rename is executed on all the workers to keep the
|
||||
* types in sync across the cluster.
|
||||
*/
|
||||
List *
|
||||
PreprocessRenameSequenceStmt(Node *node, const char *queryString, ProcessUtilityContext
|
||||
processUtilityContext)
|
||||
{
|
||||
RenameStmt *stmt = castNode(RenameStmt, node);
|
||||
Assert(stmt->renameType == OBJECT_SEQUENCE);
|
||||
|
||||
ObjectAddress address = GetObjectAddressFromParseTree((Node *) stmt,
|
||||
stmt->missing_ok);
|
||||
|
||||
if (!ShouldPropagateAlterSequence(&address))
|
||||
{
|
||||
return NIL;
|
||||
}
|
||||
|
||||
EnsureCoordinator();
|
||||
QualifyTreeNode((Node *) stmt);
|
||||
|
||||
/* this takes care of cases where not all workers have synced metadata */
|
||||
RenameStmt *stmtCopy = copyObject(stmt);
|
||||
stmtCopy->missing_ok = true;
|
||||
|
||||
const char *sql = DeparseTreeNode((Node *) stmtCopy);
|
||||
|
||||
List *commands = list_make3(DISABLE_DDL_PROPAGATION, (void *) sql,
|
||||
ENABLE_DDL_PROPAGATION);
|
||||
|
||||
return NodeDDLTaskList(NON_COORDINATOR_NODES, commands);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RenameSequenceStmtObjectAddress returns the ObjectAddress of the sequence that is the
|
||||
* subject of the RenameStmt.
|
||||
*/
|
||||
ObjectAddress
|
||||
RenameSequenceStmtObjectAddress(Node *node, bool missing_ok)
|
||||
{
|
||||
RenameStmt *stmt = castNode(RenameStmt, node);
|
||||
Assert(stmt->renameType == OBJECT_SEQUENCE);
|
||||
|
||||
RangeVar *sequence = stmt->relation;
|
||||
Oid seqOid = RangeVarGetRelid(sequence, NoLock, missing_ok);
|
||||
ObjectAddress sequenceAddress = { 0 };
|
||||
ObjectAddressSet(sequenceAddress, RelationRelationId, seqOid);
|
||||
|
||||
return sequenceAddress;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ShouldPropagateAlterSequence returns, based on the address of a sequence, if alter
|
||||
* statements targeting the function should be propagated.
|
||||
*/
|
||||
static bool
|
||||
ShouldPropagateAlterSequence(const ObjectAddress *address)
|
||||
{
|
||||
if (creating_extension)
|
||||
{
|
||||
/*
|
||||
* extensions should be created separately on the workers, sequences cascading
|
||||
* from an extension should therefore not be propagated.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!EnableDependencyCreation)
|
||||
{
|
||||
/*
|
||||
* we are configured to disable object propagation, should not propagate anything
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!IsObjectDistributed(address))
|
||||
{
|
||||
/* do not propagate alter sequence for non-distributed sequences */
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* PreprocessAlterSequenceStmt gets called during the planning phase of an ALTER SEQUENCE statement
|
||||
* of one of the following forms:
|
||||
* ALTER SEQUENCE [ IF EXISTS ] name
|
||||
* [ AS data_type ]
|
||||
* [ INCREMENT [ BY ] increment ]
|
||||
* [ MINVALUE minvalue | NO MINVALUE ] [ MAXVALUE maxvalue | NO MAXVALUE ]
|
||||
* [ START [ WITH ] start ]
|
||||
* [ RESTART [ [ WITH ] restart ] ]
|
||||
* [ CACHE cache ] [ [ NO ] CYCLE ]
|
||||
* [ OWNED BY { table_name.column_name | NONE } ]
|
||||
*
|
||||
* For distributed sequences, this operation will not be allowed for now.
|
||||
* The reason is that we change sequence parameters when distributing it, so we don't want to
|
||||
* touch those parameters for now.
|
||||
*/
|
||||
List *
|
||||
PreprocessAlterSequenceStmt(Node *node, const char *queryString,
|
||||
ProcessUtilityContext processUtilityContext)
|
||||
{
|
||||
AlterSeqStmt *stmt = castNode(AlterSeqStmt, node);
|
||||
|
||||
ObjectAddress address = GetObjectAddressFromParseTree((Node *) stmt,
|
||||
stmt->missing_ok);
|
||||
|
||||
/* error out if the sequence is distributed */
|
||||
if (IsObjectDistributed(&address))
|
||||
{
|
||||
ereport(ERROR, (errmsg(
|
||||
"This operation is currently not allowed for a distributed sequence.")));
|
||||
}
|
||||
else
|
||||
{
|
||||
return NIL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AlterSequenceOwnerObjectAddress returns the ObjectAddress of the sequence that is the
|
||||
* subject of the AlterOwnerStmt.
|
||||
*/
|
||||
ObjectAddress
|
||||
AlterSequenceObjectAddress(Node *node, bool missing_ok)
|
||||
{
|
||||
AlterSeqStmt *stmt = castNode(AlterSeqStmt, node);
|
||||
|
||||
RangeVar *sequence = stmt->sequence;
|
||||
Oid seqOid = RangeVarGetRelid(sequence, NoLock, stmt->missing_ok);
|
||||
ObjectAddress sequenceAddress = { 0 };
|
||||
ObjectAddressSet(sequenceAddress, RelationRelationId, seqOid);
|
||||
|
||||
return sequenceAddress;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* PreprocessAlterSequenceSchemaStmt is executed before the statement is applied to the local
|
||||
* postgres instance.
|
||||
*
|
||||
* For distributed sequences, this operation will not be allowed for now.
|
||||
*/
|
||||
List *
|
||||
PreprocessAlterSequenceSchemaStmt(Node *node, const char *queryString,
|
||||
ProcessUtilityContext processUtilityContext)
|
||||
{
|
||||
AlterObjectSchemaStmt *stmt = castNode(AlterObjectSchemaStmt, node);
|
||||
Assert(stmt->objectType == OBJECT_SEQUENCE);
|
||||
|
||||
ObjectAddress address = GetObjectAddressFromParseTree((Node *) stmt,
|
||||
stmt->missing_ok);
|
||||
|
||||
/* error out if the sequence is distributed */
|
||||
if (IsObjectDistributed(&address))
|
||||
{
|
||||
ereport(ERROR, (errmsg(
|
||||
"This operation is currently not allowed for a distributed sequence.")));
|
||||
}
|
||||
else
|
||||
{
|
||||
return NIL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AlterSequenceSchemaStmtObjectAddress returns the ObjectAddress of the sequence that is
|
||||
* the subject of the AlterObjectSchemaStmt.
|
||||
*/
|
||||
ObjectAddress
|
||||
AlterSequenceSchemaStmtObjectAddress(Node *node, bool missing_ok)
|
||||
{
|
||||
AlterObjectSchemaStmt *stmt = castNode(AlterObjectSchemaStmt, node);
|
||||
Assert(stmt->objectType == OBJECT_SEQUENCE);
|
||||
|
||||
RangeVar *sequence = stmt->relation;
|
||||
Oid seqOid = RangeVarGetRelid(sequence, NoLock, missing_ok);
|
||||
ObjectAddress sequenceAddress = { 0 };
|
||||
ObjectAddressSet(sequenceAddress, RelationRelationId, seqOid);
|
||||
|
||||
return sequenceAddress;
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "catalog/pg_class.h"
|
||||
#include "catalog/pg_constraint.h"
|
||||
#include "catalog/pg_depend.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "commands/tablecmds.h"
|
||||
#include "distributed/citus_ruleutils.h"
|
||||
#include "distributed/colocation_utils.h"
|
||||
|
@ -28,14 +29,17 @@
|
|||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/metadata_sync.h"
|
||||
#include "distributed/metadata/dependency.h"
|
||||
#include "distributed/metadata/distobject.h"
|
||||
#include "distributed/multi_executor.h"
|
||||
#include "distributed/multi_partitioning_utils.h"
|
||||
#include "distributed/reference_table_utils.h"
|
||||
#include "distributed/relation_access_tracking.h"
|
||||
#include "distributed/resource_lock.h"
|
||||
#include "distributed/version_compat.h"
|
||||
#include "distributed/worker_shard_visibility.h"
|
||||
#include "lib/stringinfo.h"
|
||||
#include "nodes/parsenodes.h"
|
||||
#include "parser/parse_expr.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/lsyscache.h"
|
||||
|
@ -122,6 +126,8 @@ PreprocessDropTableStmt(Node *node, const char *queryString,
|
|||
|
||||
Oid relationId = RangeVarGetRelid(tableRangeVar, AccessShareLock, missingOK);
|
||||
|
||||
ErrorIfIllegallyChangingKnownShard(relationId);
|
||||
|
||||
/* we're not interested in non-valid, non-distributed relations */
|
||||
if (relationId == InvalidOid || !IsCitusTable(relationId))
|
||||
{
|
||||
|
@ -165,6 +171,8 @@ PreprocessDropTableStmt(Node *node, const char *queryString,
|
|||
|
||||
SendCommandToWorkersWithMetadata(detachPartitionCommand);
|
||||
}
|
||||
|
||||
SendCommandToWorkersWithMetadata(ENABLE_DDL_PROPAGATION);
|
||||
}
|
||||
|
||||
return NIL;
|
||||
|
@ -574,24 +582,50 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
|
|||
ErrorIfUnsupportedAlterTableStmt(alterTableStatement);
|
||||
}
|
||||
|
||||
EnsureCoordinator();
|
||||
|
||||
/* these will be set in below loop according to subcommands */
|
||||
Oid rightRelationId = InvalidOid;
|
||||
bool executeSequentially = false;
|
||||
|
||||
/*
|
||||
* We check if there is a ADD/DROP FOREIGN CONSTRAINT command in sub commands
|
||||
* We check if there is:
|
||||
* - an ADD/DROP FOREIGN CONSTRAINT command in sub commands
|
||||
* list. If there is we assign referenced relation id to rightRelationId and
|
||||
* we also set skip_validation to true to prevent PostgreSQL to verify validity
|
||||
* of the foreign constraint in master. Validity will be checked in workers
|
||||
* anyway.
|
||||
* - an ADD COLUMN .. DEFAULT nextval('..') OR
|
||||
* an ADD COLUMN .. SERIAL pseudo-type OR
|
||||
* an ALTER COLUMN .. SET DEFAULT nextval('..'). If there is we set
|
||||
* deparseAT variable to true which means we will deparse the statement
|
||||
* before we propagate the command to shards. For shards, all the defaults
|
||||
* coming from a user-defined sequence will be replaced by
|
||||
* NOT NULL constraint.
|
||||
*/
|
||||
List *commandList = alterTableStatement->cmds;
|
||||
|
||||
/*
|
||||
* if deparsing is needed, we will use a different version of the original
|
||||
* alterTableStmt
|
||||
*/
|
||||
bool deparseAT = false;
|
||||
bool propagateCommandToWorkers = true;
|
||||
AlterTableStmt *newStmt = copyObject(alterTableStatement);
|
||||
|
||||
AlterTableCmd *newCmd = makeNode(AlterTableCmd);
|
||||
|
||||
AlterTableCmd *command = NULL;
|
||||
foreach_ptr(command, commandList)
|
||||
{
|
||||
AlterTableType alterTableType = command->subtype;
|
||||
|
||||
/*
|
||||
* if deparsing is needed, we will use a different version of the original
|
||||
* AlterTableCmd
|
||||
*/
|
||||
newCmd = copyObject(command);
|
||||
|
||||
if (alterTableType == AT_AddConstraint)
|
||||
{
|
||||
Constraint *constraint = (Constraint *) command->def;
|
||||
|
@ -666,6 +700,96 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We check for ADD COLUMN .. DEFAULT expr
|
||||
* if expr contains nextval('user_defined_seq')
|
||||
* we should deparse the statement
|
||||
*/
|
||||
constraint = NULL;
|
||||
foreach_ptr(constraint, columnConstraints)
|
||||
{
|
||||
if (constraint->contype == CONSTR_DEFAULT)
|
||||
{
|
||||
if (constraint->raw_expr != NULL)
|
||||
{
|
||||
ParseState *pstate = make_parsestate(NULL);
|
||||
Node *expr = transformExpr(pstate, constraint->raw_expr,
|
||||
EXPR_KIND_COLUMN_DEFAULT);
|
||||
|
||||
if (contain_nextval_expression_walker(expr, NULL))
|
||||
{
|
||||
deparseAT = true;
|
||||
|
||||
/* the new column definition will have no constraint */
|
||||
ColumnDef *newColDef = copyObject(columnDefinition);
|
||||
newColDef->constraints = NULL;
|
||||
|
||||
newCmd->def = (Node *) newColDef;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We check for ADD COLUMN .. SERIAL pseudo-type
|
||||
* if that's the case, we should deparse the statement
|
||||
* The structure of this check is copied from transformColumnDefinition.
|
||||
*/
|
||||
if (columnDefinition->typeName && list_length(
|
||||
columnDefinition->typeName->names) == 1 &&
|
||||
!columnDefinition->typeName->pct_type)
|
||||
{
|
||||
char *typeName = strVal(linitial(columnDefinition->typeName->names));
|
||||
|
||||
if (strcmp(typeName, "smallserial") == 0 ||
|
||||
strcmp(typeName, "serial2") == 0 ||
|
||||
strcmp(typeName, "serial") == 0 ||
|
||||
strcmp(typeName, "serial4") == 0 ||
|
||||
strcmp(typeName, "bigserial") == 0 ||
|
||||
strcmp(typeName, "serial8") == 0)
|
||||
{
|
||||
deparseAT = true;
|
||||
|
||||
ColumnDef *newColDef = copyObject(columnDefinition);
|
||||
newColDef->is_not_null = false;
|
||||
|
||||
if (strcmp(typeName, "smallserial") == 0 ||
|
||||
strcmp(typeName, "serial2") == 0)
|
||||
{
|
||||
newColDef->typeName->names = NIL;
|
||||
newColDef->typeName->typeOid = INT2OID;
|
||||
}
|
||||
else if (strcmp(typeName, "serial") == 0 ||
|
||||
strcmp(typeName, "serial4") == 0)
|
||||
{
|
||||
newColDef->typeName->names = NIL;
|
||||
newColDef->typeName->typeOid = INT4OID;
|
||||
}
|
||||
else if (strcmp(typeName, "bigserial") == 0 ||
|
||||
strcmp(typeName, "serial8") == 0)
|
||||
{
|
||||
newColDef->typeName->names = NIL;
|
||||
newColDef->typeName->typeOid = INT8OID;
|
||||
}
|
||||
newCmd->def = (Node *) newColDef;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* We check for ALTER COLUMN .. SET/DROP DEFAULT
|
||||
* we should not propagate anything to shards
|
||||
*/
|
||||
else if (alterTableType == AT_ColumnDefault)
|
||||
{
|
||||
ParseState *pstate = make_parsestate(NULL);
|
||||
Node *expr = transformExpr(pstate, command->def,
|
||||
EXPR_KIND_COLUMN_DEFAULT);
|
||||
|
||||
if (contain_nextval_expression_walker(expr, NULL))
|
||||
{
|
||||
propagateCommandToWorkers = false;
|
||||
}
|
||||
}
|
||||
else if (alterTableType == AT_AttachPartition)
|
||||
{
|
||||
|
@ -731,12 +855,20 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
|
|||
DDLJob *ddlJob = palloc0(sizeof(DDLJob));
|
||||
ddlJob->targetRelationId = leftRelationId;
|
||||
ddlJob->concurrentIndexCmd = false;
|
||||
|
||||
const char *sqlForTaskList = alterTableCommand;
|
||||
if (deparseAT)
|
||||
{
|
||||
newStmt->cmds = list_make1(newCmd);
|
||||
sqlForTaskList = DeparseTreeNode((Node *) newStmt);
|
||||
}
|
||||
|
||||
ddlJob->commandString = alterTableCommand;
|
||||
|
||||
if (OidIsValid(rightRelationId))
|
||||
{
|
||||
bool referencedIsLocalTable = !IsCitusTable(rightRelationId);
|
||||
if (referencedIsLocalTable)
|
||||
if (referencedIsLocalTable || !propagateCommandToWorkers)
|
||||
{
|
||||
ddlJob->taskList = NIL;
|
||||
}
|
||||
|
@ -744,13 +876,17 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
|
|||
{
|
||||
/* if foreign key related, use specialized task list function ... */
|
||||
ddlJob->taskList = InterShardDDLTaskList(leftRelationId, rightRelationId,
|
||||
alterTableCommand);
|
||||
sqlForTaskList);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* ... otherwise use standard DDL task list function */
|
||||
ddlJob->taskList = DDLTaskList(leftRelationId, alterTableCommand);
|
||||
ddlJob->taskList = DDLTaskList(leftRelationId, sqlForTaskList);
|
||||
if (!propagateCommandToWorkers)
|
||||
{
|
||||
ddlJob->taskList = NIL;
|
||||
}
|
||||
}
|
||||
|
||||
List *ddlJobs = list_make1(ddlJob);
|
||||
|
@ -1467,8 +1603,80 @@ PostprocessAlterTableStmt(AlterTableStmt *alterTableStatement)
|
|||
constraint);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We check for ADD COLUMN .. DEFAULT expr
|
||||
* if expr contains nextval('user_defined_seq')
|
||||
* we should make sure that the type of the column that uses
|
||||
* that sequence is supported
|
||||
*/
|
||||
constraint = NULL;
|
||||
foreach_ptr(constraint, columnConstraints)
|
||||
{
|
||||
if (constraint->contype == CONSTR_DEFAULT)
|
||||
{
|
||||
if (constraint->raw_expr != NULL)
|
||||
{
|
||||
ParseState *pstate = make_parsestate(NULL);
|
||||
Node *expr = transformExpr(pstate, constraint->raw_expr,
|
||||
EXPR_KIND_COLUMN_DEFAULT);
|
||||
|
||||
/*
|
||||
* We should make sure that the type of the column that uses
|
||||
* that sequence is supported
|
||||
*/
|
||||
if (contain_nextval_expression_walker(expr, NULL))
|
||||
{
|
||||
AttrNumber attnum = get_attnum(relationId,
|
||||
columnDefinition->colname);
|
||||
Oid seqTypId = GetAttributeTypeOid(relationId, attnum);
|
||||
EnsureSequenceTypeSupported(relationId, attnum, seqTypId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* We check for ALTER COLUMN .. SET DEFAULT nextval('user_defined_seq')
|
||||
* we should make sure that the type of the column that uses
|
||||
* that sequence is supported
|
||||
*/
|
||||
else if (alterTableType == AT_ColumnDefault)
|
||||
{
|
||||
ParseState *pstate = make_parsestate(NULL);
|
||||
Node *expr = transformExpr(pstate, command->def,
|
||||
EXPR_KIND_COLUMN_DEFAULT);
|
||||
|
||||
if (contain_nextval_expression_walker(expr, NULL))
|
||||
{
|
||||
AttrNumber attnum = get_attnum(relationId, command->name);
|
||||
Oid seqTypId = GetAttributeTypeOid(relationId, attnum);
|
||||
EnsureSequenceTypeSupported(relationId, attnum, seqTypId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* for the new sequences coming with this ALTER TABLE statement */
|
||||
if (ShouldSyncTableMetadata(relationId) && ClusterHasKnownMetadataWorkers())
|
||||
{
|
||||
List *sequenceCommandList = NIL;
|
||||
|
||||
/* commands to create sequences */
|
||||
List *sequenceDDLCommands = SequenceDDLCommandsForTable(relationId);
|
||||
sequenceCommandList = list_concat(sequenceCommandList, sequenceDDLCommands);
|
||||
|
||||
/* prevent recursive propagation */
|
||||
SendCommandToWorkersWithMetadata(DISABLE_DDL_PROPAGATION);
|
||||
|
||||
/* send the commands one by one */
|
||||
const char *sequenceCommand = NULL;
|
||||
foreach_ptr(sequenceCommand, sequenceCommandList)
|
||||
{
|
||||
SendCommandToWorkersWithMetadata(sequenceCommand);
|
||||
}
|
||||
|
||||
SendCommandToWorkersWithMetadata(ENABLE_DDL_PROPAGATION);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -1735,10 +1943,101 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement)
|
|||
strcmp(typeName, "serial4") == 0 ||
|
||||
strcmp(typeName, "bigserial") == 0 ||
|
||||
strcmp(typeName, "serial8") == 0)
|
||||
{
|
||||
/*
|
||||
* We currently don't support adding a serial column for an MX table
|
||||
* TODO: record the dependency in the workers
|
||||
*/
|
||||
if (ShouldSyncTableMetadata(relationId) &&
|
||||
ClusterHasKnownMetadataWorkers())
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("cannot execute ADD COLUMN commands "
|
||||
"involving serial pseudotypes")));
|
||||
errmsg(
|
||||
"cannot execute ADD COLUMN commands involving serial"
|
||||
" pseudotypes when metadata is synchronized to workers")));
|
||||
}
|
||||
|
||||
/*
|
||||
* we only allow adding a serial column if it is the only subcommand
|
||||
* and it has no constraints
|
||||
*/
|
||||
if (commandList->length > 1 || column->constraints)
|
||||
{
|
||||
ereport(ERROR, (errcode(
|
||||
ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg(
|
||||
"cannot execute ADD COLUMN commands involving "
|
||||
"serial pseudotypes with other subcommands/constraints"),
|
||||
errhint(
|
||||
"You can issue each subcommand separately")));
|
||||
}
|
||||
|
||||
/*
|
||||
* Currently we don't support backfilling the new column with default values
|
||||
* if the table is not empty
|
||||
*/
|
||||
if (!TableEmpty(relationId))
|
||||
{
|
||||
ereport(ERROR, (errcode(
|
||||
ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg(
|
||||
"Cannot add a column involving serial pseudotypes "
|
||||
"because the table is not empty"),
|
||||
errhint(
|
||||
"You can first call ALTER TABLE .. ADD COLUMN .. smallint/int/bigint\n"
|
||||
"Then set the default by ALTER TABLE .. ALTER COLUMN .. SET DEFAULT nextval('..')")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List *columnConstraints = column->constraints;
|
||||
|
||||
Constraint *constraint = NULL;
|
||||
foreach_ptr(constraint, columnConstraints)
|
||||
{
|
||||
if (constraint->contype == CONSTR_DEFAULT)
|
||||
{
|
||||
if (constraint->raw_expr != NULL)
|
||||
{
|
||||
ParseState *pstate = make_parsestate(NULL);
|
||||
Node *expr = transformExpr(pstate, constraint->raw_expr,
|
||||
EXPR_KIND_COLUMN_DEFAULT);
|
||||
|
||||
if (contain_nextval_expression_walker(expr, NULL))
|
||||
{
|
||||
/*
|
||||
* we only allow adding a column with non_const default
|
||||
* if its the only subcommand and has no other constraints
|
||||
*/
|
||||
if (commandList->length > 1 ||
|
||||
columnConstraints->length > 1)
|
||||
{
|
||||
ereport(ERROR, (errcode(
|
||||
ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg(
|
||||
"cannot execute ADD COLUMN .. DEFAULT nextval('..')"
|
||||
" command with other subcommands/constraints"),
|
||||
errhint(
|
||||
"You can issue each subcommand separately")));
|
||||
}
|
||||
|
||||
/*
|
||||
* Currently we don't support backfilling the new column with default values
|
||||
* if the table is not empty
|
||||
*/
|
||||
if (!TableEmpty(relationId))
|
||||
{
|
||||
ereport(ERROR, (errcode(
|
||||
ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg(
|
||||
"cannot add a column involving DEFAULT nextval('..') "
|
||||
"because the table is not empty"),
|
||||
errhint(
|
||||
"You can first call ALTER TABLE .. ADD COLUMN .. smallint/int/bigint\n"
|
||||
"Then set the default by ALTER TABLE .. ALTER COLUMN .. SET DEFAULT nextval('..')")));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1746,9 +2045,67 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement)
|
|||
break;
|
||||
}
|
||||
|
||||
case AT_DropColumn:
|
||||
case AT_ColumnDefault:
|
||||
{
|
||||
if (AlterInvolvesPartitionColumn(alterTableStatement, command))
|
||||
{
|
||||
ereport(ERROR, (errmsg("cannot execute ALTER TABLE command "
|
||||
"involving partition column")));
|
||||
}
|
||||
|
||||
ParseState *pstate = make_parsestate(NULL);
|
||||
Node *expr = transformExpr(pstate, command->def,
|
||||
EXPR_KIND_COLUMN_DEFAULT);
|
||||
|
||||
if (contain_nextval_expression_walker(expr, NULL))
|
||||
{
|
||||
/*
|
||||
* we only allow altering a column's default to non_const expr
|
||||
* if its the only subcommand
|
||||
*/
|
||||
if (commandList->length > 1)
|
||||
{
|
||||
ereport(ERROR, (errcode(
|
||||
ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg(
|
||||
"cannot execute ALTER COLUMN COLUMN .. SET DEFAULT "
|
||||
"nextval('..') command with other subcommands"),
|
||||
errhint(
|
||||
"You can issue each subcommand separately")));
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case AT_AlterColumnType:
|
||||
{
|
||||
if (AlterInvolvesPartitionColumn(alterTableStatement, command))
|
||||
{
|
||||
ereport(ERROR, (errmsg("cannot execute ALTER TABLE command "
|
||||
"involving partition column")));
|
||||
}
|
||||
|
||||
/*
|
||||
* We check for ALTER COLUMN TYPE ...
|
||||
* if the column has default coming from a user-defined sequence
|
||||
* changing the type of the column should not be allowed for now
|
||||
*/
|
||||
AttrNumber attnum = get_attnum(relationId, command->name);
|
||||
List *attnumList = NIL;
|
||||
List *dependentSequenceList = NIL;
|
||||
GetDependentSequencesWithRelation(relationId, &attnumList,
|
||||
&dependentSequenceList, attnum);
|
||||
if (dependentSequenceList != NIL)
|
||||
{
|
||||
ereport(ERROR, (errmsg("cannot execute ALTER COLUMN TYPE .. command "
|
||||
"because the column involves a default coming "
|
||||
"from a sequence")));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case AT_DropColumn:
|
||||
case AT_DropNotNull:
|
||||
{
|
||||
if (AlterInvolvesPartitionColumn(alterTableStatement, command))
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include "distributed/resource_lock.h"
|
||||
#include "distributed/transaction_management.h"
|
||||
#include "distributed/worker_transaction.h"
|
||||
#include "distributed/worker_shard_visibility.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/lsyscache.h"
|
||||
|
@ -174,10 +175,11 @@ TruncateTaskList(Oid relationId)
|
|||
Datum
|
||||
truncate_local_data_after_distributing_table(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
|
||||
EnsureLocalTableCanBeTruncated(relationId);
|
||||
|
||||
TruncateStmt *truncateStmt = makeNode(TruncateStmt);
|
||||
|
@ -215,16 +217,12 @@ EnsureLocalTableCanBeTruncated(Oid relationId)
|
|||
"tables.")));
|
||||
}
|
||||
|
||||
/* make sure there are no foreign key references from a local table */
|
||||
SetForeignConstraintRelationshipGraphInvalid();
|
||||
List *referencingRelationList = ReferencingRelationIdList(relationId);
|
||||
|
||||
Oid referencingRelation = InvalidOid;
|
||||
foreach_oid(referencingRelation, referencingRelationList)
|
||||
{
|
||||
/* we do not truncate a table if there is a local table referencing it */
|
||||
if (!IsCitusTable(referencingRelation))
|
||||
List *referencingForeignConstaintsFromLocalTables =
|
||||
GetForeignKeysFromLocalTables(relationId);
|
||||
if (list_length(referencingForeignConstaintsFromLocalTables) > 0)
|
||||
{
|
||||
Oid foreignKeyId = linitial_oid(referencingForeignConstaintsFromLocalTables);
|
||||
Oid referencingRelation = GetReferencingTableId(foreignKeyId);
|
||||
char *referencedRelationName = get_rel_name(relationId);
|
||||
char *referencingRelationName = get_rel_name(referencingRelation);
|
||||
|
||||
|
@ -234,7 +232,6 @@ EnsureLocalTableCanBeTruncated(Oid relationId)
|
|||
referencingRelationName,
|
||||
referencedRelationName)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -265,6 +262,9 @@ ErrorIfUnsupportedTruncateStmt(TruncateStmt *truncateStatement)
|
|||
foreach_ptr(rangeVar, relationList)
|
||||
{
|
||||
Oid relationId = RangeVarGetRelid(rangeVar, NoLock, false);
|
||||
|
||||
ErrorIfIllegallyChangingKnownShard(relationId);
|
||||
|
||||
char relationKind = get_rel_relkind(relationId);
|
||||
if (IsCitusTable(relationId) &&
|
||||
relationKind == RELKIND_FOREIGN_TABLE)
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
/* stores the string representation of our node connection GUC */
|
||||
char *NodeConninfo = "";
|
||||
char *LocalHostName = "localhost";
|
||||
|
||||
/* represents a list of libpq parameter settings */
|
||||
typedef struct ConnParamsInfo
|
||||
|
|
|
@ -111,9 +111,9 @@ PG_FUNCTION_INFO_V1(citus_reserved_connection_stats);
|
|||
Datum
|
||||
citus_reserved_connection_stats(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TupleDesc tupleDescriptor = NULL;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
TupleDesc tupleDescriptor = NULL;
|
||||
Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor);
|
||||
|
||||
StoreAllReservedConnections(tupleStore, tupleDescriptor);
|
||||
|
|
|
@ -846,6 +846,18 @@ ConnectionModifiedPlacement(MultiConnection *connection)
|
|||
return false;
|
||||
}
|
||||
|
||||
if (dlist_is_empty(&connection->referencedPlacements))
|
||||
{
|
||||
/*
|
||||
* When referencesPlacements are empty, it means that we come here
|
||||
* from an API that uses a node connection (e.g., not placement connection),
|
||||
* which doesn't set placements.
|
||||
* In that case, the command sent could be either write or read, so we assume
|
||||
* it is write to be on the safe side.
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
|
||||
dlist_foreach(placementIter, &connection->referencedPlacements)
|
||||
{
|
||||
ConnectionReference *connectionReference =
|
||||
|
|
|
@ -136,9 +136,9 @@ PG_FUNCTION_INFO_V1(citus_remote_connection_stats);
|
|||
Datum
|
||||
citus_remote_connection_stats(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TupleDesc tupleDescriptor = NULL;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
TupleDesc tupleDescriptor = NULL;
|
||||
Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor);
|
||||
|
||||
StoreAllRemoteConnectionStats(tupleStore, tupleDescriptor);
|
||||
|
|
|
@ -27,10 +27,12 @@
|
|||
#include "catalog/indexing.h"
|
||||
#include "catalog/namespace.h"
|
||||
#include "catalog/pg_am.h"
|
||||
#include "catalog/pg_attrdef.h"
|
||||
#include "catalog/pg_attribute.h"
|
||||
#include "catalog/pg_authid.h"
|
||||
#include "catalog/pg_class.h"
|
||||
#include "catalog/pg_collation.h"
|
||||
#include "catalog/pg_depend.h"
|
||||
#include "catalog/pg_extension.h"
|
||||
#include "catalog/pg_foreign_data_wrapper.h"
|
||||
#include "catalog/pg_index.h"
|
||||
|
@ -40,9 +42,10 @@
|
|||
#include "distributed/citus_ruleutils.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/multi_partitioning_utils.h"
|
||||
#include "distributed/relay_utility.h"
|
||||
#include "distributed/metadata_utility.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/metadata_sync.h"
|
||||
#include "distributed/metadata_utility.h"
|
||||
#include "distributed/relay_utility.h"
|
||||
#include "distributed/version_compat.h"
|
||||
#include "foreign/foreign.h"
|
||||
#include "lib/stringinfo.h"
|
||||
|
@ -74,6 +77,8 @@ static void AppendStorageParametersToString(StringInfo stringBuffer,
|
|||
List *optionList);
|
||||
static void simple_quote_literal(StringInfo buf, const char *val);
|
||||
static char * flatten_reloptions(Oid relid);
|
||||
static Oid get_attrdef_oid(Oid relationId, AttrNumber attnum);
|
||||
|
||||
|
||||
/*
|
||||
* pg_get_extensiondef_string finds the foreign data wrapper that corresponds to
|
||||
|
@ -365,6 +370,16 @@ pg_get_tableschemadef_string(Oid tableRelationId, bool includeSequenceDefaults,
|
|||
appendStringInfo(&buffer, " DEFAULT %s", defaultString);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We should make sure that the type of the column that uses
|
||||
* that sequence is supported
|
||||
*/
|
||||
if (contain_nextval_expression_walker(defaultNode, NULL))
|
||||
{
|
||||
EnsureSequenceTypeSupported(tableRelationId, defaultValue->adnum,
|
||||
attributeForm->atttypid);
|
||||
}
|
||||
}
|
||||
|
||||
/* if this column has a not null constraint, append the constraint */
|
||||
|
@ -483,6 +498,138 @@ pg_get_tableschemadef_string(Oid tableRelationId, bool includeSequenceDefaults,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* EnsureSequenceTypeSupported ensures that the type of the column that uses
|
||||
* a sequence on its DEFAULT is consistent with previous uses of the sequence (if any)
|
||||
* It gets the AttrDefault OID from the given relationId and attnum, extracts the sequence
|
||||
* id from it, and if any other distributed table uses that same sequence, it checks whether
|
||||
* the types of the columns using the sequence match. If they don't, it errors out.
|
||||
* Otherwise, the condition is ensured.
|
||||
*/
|
||||
void
|
||||
EnsureSequenceTypeSupported(Oid relationId, AttrNumber attnum, Oid seqTypId)
|
||||
{
|
||||
/* get attrdefoid from the given relationId and attnum */
|
||||
Oid attrdefOid = get_attrdef_oid(relationId, attnum);
|
||||
|
||||
/* retrieve the sequence id of the sequence found in nextval('seq') */
|
||||
List *sequencesFromAttrDef = GetSequencesFromAttrDef(attrdefOid);
|
||||
|
||||
if (list_length(sequencesFromAttrDef) == 0)
|
||||
{
|
||||
/*
|
||||
* We need this check because sometimes there are cases where the
|
||||
* dependency between the table and the sequence is not formed
|
||||
* One example is when the default is defined by
|
||||
* DEFAULT nextval('seq_name'::text) (not by DEFAULT nextval('seq_name'))
|
||||
* In these cases, sequencesFromAttrDef with be empty.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
if (list_length(sequencesFromAttrDef) > 1)
|
||||
{
|
||||
/* to simplify and eliminate cases like "DEFAULT nextval('..') - nextval('..')" */
|
||||
ereport(ERROR, (errmsg(
|
||||
"More than one sequence in a column default"
|
||||
" is not supported for distribution")));
|
||||
}
|
||||
|
||||
Oid seqOid = lfirst_oid(list_head(sequencesFromAttrDef));
|
||||
|
||||
List *citusTableIdList = CitusTableTypeIdList(ANY_CITUS_TABLE_TYPE);
|
||||
Oid citusTableId = InvalidOid;
|
||||
foreach_oid(citusTableId, citusTableIdList)
|
||||
{
|
||||
List *attnumList = NIL;
|
||||
List *dependentSequenceList = NIL;
|
||||
GetDependentSequencesWithRelation(citusTableId, &attnumList,
|
||||
&dependentSequenceList, 0);
|
||||
ListCell *attnumCell = NULL;
|
||||
ListCell *dependentSequenceCell = NULL;
|
||||
forboth(attnumCell, attnumList, dependentSequenceCell,
|
||||
dependentSequenceList)
|
||||
{
|
||||
AttrNumber currentAttnum = lfirst_int(attnumCell);
|
||||
Oid currentSeqOid = lfirst_oid(dependentSequenceCell);
|
||||
|
||||
/*
|
||||
* If another distributed table is using the same sequence
|
||||
* in one of its column defaults, make sure the types of the
|
||||
* columns match
|
||||
*/
|
||||
if (currentSeqOid == seqOid)
|
||||
{
|
||||
Oid currentSeqTypId = GetAttributeTypeOid(citusTableId,
|
||||
currentAttnum);
|
||||
if (seqTypId != currentSeqTypId)
|
||||
{
|
||||
char *sequenceName = generate_qualified_relation_name(
|
||||
seqOid);
|
||||
char *citusTableName =
|
||||
generate_qualified_relation_name(citusTableId);
|
||||
ereport(ERROR, (errmsg(
|
||||
"The sequence %s is already used for a different"
|
||||
" type in column %d of the table %s",
|
||||
sequenceName, currentAttnum,
|
||||
citusTableName)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* get_attrdef_oid gets the oid of the attrdef that has dependency with
|
||||
* the given relationId (refobjid) and attnum (refobjsubid).
|
||||
* If there is no such attrdef it returns InvalidOid.
|
||||
* NOTE: we are iterating pg_depend here since this function is used together
|
||||
* with other functions that iterate pg_depend. Normally, a look at pg_attrdef
|
||||
* would make more sense.
|
||||
*/
|
||||
static Oid
|
||||
get_attrdef_oid(Oid relationId, AttrNumber attnum)
|
||||
{
|
||||
Oid resultAttrdefOid = InvalidOid;
|
||||
|
||||
ScanKeyData key[3];
|
||||
|
||||
Relation depRel = table_open(DependRelationId, AccessShareLock);
|
||||
|
||||
ScanKeyInit(&key[0],
|
||||
Anum_pg_depend_refclassid,
|
||||
BTEqualStrategyNumber, F_OIDEQ,
|
||||
ObjectIdGetDatum(RelationRelationId));
|
||||
ScanKeyInit(&key[1],
|
||||
Anum_pg_depend_refobjid,
|
||||
BTEqualStrategyNumber, F_OIDEQ,
|
||||
ObjectIdGetDatum(relationId));
|
||||
ScanKeyInit(&key[2],
|
||||
Anum_pg_depend_refobjsubid,
|
||||
BTEqualStrategyNumber, F_INT4EQ,
|
||||
Int32GetDatum(attnum));
|
||||
|
||||
SysScanDesc scan = systable_beginscan(depRel, DependReferenceIndexId, true,
|
||||
NULL, attnum ? 3 : 2, key);
|
||||
|
||||
HeapTuple tup;
|
||||
while (HeapTupleIsValid(tup = systable_getnext(scan)))
|
||||
{
|
||||
Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(tup);
|
||||
|
||||
if (deprec->classid == AttrDefaultRelationId)
|
||||
{
|
||||
resultAttrdefOid = deprec->objid;
|
||||
}
|
||||
}
|
||||
|
||||
systable_endscan(scan);
|
||||
table_close(depRel, AccessShareLock);
|
||||
return resultAttrdefOid;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* EnsureRelationKindSupported errors out if the given relation is not supported
|
||||
* as a distributed relation.
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* deparse_database_stmts.c
|
||||
*
|
||||
* All routines to deparse database statements.
|
||||
*
|
||||
* Copyright (c), Citus Data, Inc.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "catalog/namespace.h"
|
||||
#include "lib/stringinfo.h"
|
||||
#include "nodes/parsenodes.h"
|
||||
#include "utils/builtins.h"
|
||||
|
||||
#include "distributed/citus_ruleutils.h"
|
||||
#include "distributed/deparser.h"
|
||||
|
||||
static void AppendAlterDatabaseOwnerStmt(StringInfo buf, AlterOwnerStmt *stmt);
|
||||
|
||||
|
||||
char *
|
||||
DeparseAlterDatabaseOwnerStmt(Node *node)
|
||||
{
|
||||
AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node);
|
||||
StringInfoData str = { 0 };
|
||||
initStringInfo(&str);
|
||||
|
||||
Assert(stmt->objectType == OBJECT_DATABASE);
|
||||
|
||||
AppendAlterDatabaseOwnerStmt(&str, stmt);
|
||||
|
||||
return str.data;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
AppendAlterDatabaseOwnerStmt(StringInfo buf, AlterOwnerStmt *stmt)
|
||||
{
|
||||
Assert(stmt->objectType == OBJECT_DATABASE);
|
||||
|
||||
appendStringInfo(buf,
|
||||
"ALTER DATABASE %s OWNER TO %s;",
|
||||
quote_identifier(strVal((Value *) stmt->object)),
|
||||
RoleSpecString(stmt->newowner, true));
|
||||
}
|
|
@ -0,0 +1,158 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* deparse_sequence_stmts.c
|
||||
*
|
||||
* All routines to deparse sequence statements.
|
||||
* This file contains all entry points specific for sequence statement
|
||||
* deparsing
|
||||
*
|
||||
* Copyright (c), Citus Data, Inc.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "catalog/namespace.h"
|
||||
#include "distributed/deparser.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/lsyscache.h"
|
||||
|
||||
|
||||
/* forward declaration for deparse functions */
|
||||
static void AppendDropSequenceStmt(StringInfo buf, DropStmt *stmt);
|
||||
static void AppendSequenceNameList(StringInfo buf, List *objects, ObjectType objtype);
|
||||
static void AppendRenameSequenceStmt(StringInfo buf, RenameStmt *stmt);
|
||||
|
||||
/*
|
||||
* DeparseDropSequenceStmt builds and returns a string representing the DropStmt
|
||||
*/
|
||||
char *
|
||||
DeparseDropSequenceStmt(Node *node)
|
||||
{
|
||||
DropStmt *stmt = castNode(DropStmt, node);
|
||||
StringInfoData str = { 0 };
|
||||
initStringInfo(&str);
|
||||
|
||||
Assert(stmt->removeType == OBJECT_SEQUENCE);
|
||||
|
||||
AppendDropSequenceStmt(&str, stmt);
|
||||
|
||||
return str.data;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendDropSequenceStmt appends a string representing the DropStmt to a buffer
|
||||
*/
|
||||
static void
|
||||
AppendDropSequenceStmt(StringInfo buf, DropStmt *stmt)
|
||||
{
|
||||
appendStringInfoString(buf, "DROP SEQUENCE ");
|
||||
|
||||
if (stmt->missing_ok)
|
||||
{
|
||||
appendStringInfoString(buf, "IF EXISTS ");
|
||||
}
|
||||
|
||||
AppendSequenceNameList(buf, stmt->objects, stmt->removeType);
|
||||
|
||||
if (stmt->behavior == DROP_CASCADE)
|
||||
{
|
||||
appendStringInfoString(buf, " CASCADE");
|
||||
}
|
||||
|
||||
appendStringInfoString(buf, ";");
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendSequenceNameList appends a string representing the list of sequence names to a buffer
|
||||
*/
|
||||
static void
|
||||
AppendSequenceNameList(StringInfo buf, List *objects, ObjectType objtype)
|
||||
{
|
||||
ListCell *objectCell = NULL;
|
||||
foreach(objectCell, objects)
|
||||
{
|
||||
if (objectCell != list_head(objects))
|
||||
{
|
||||
appendStringInfo(buf, ", ");
|
||||
}
|
||||
|
||||
RangeVar *seq = makeRangeVarFromNameList((List *) lfirst(objectCell));
|
||||
|
||||
if (seq->schemaname == NULL)
|
||||
{
|
||||
Oid schemaOid = RangeVarGetCreationNamespace(seq);
|
||||
seq->schemaname = get_namespace_name(schemaOid);
|
||||
}
|
||||
|
||||
char *qualifiedSequenceName = quote_qualified_identifier(seq->schemaname,
|
||||
seq->relname);
|
||||
appendStringInfoString(buf, qualifiedSequenceName);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DeparseRenameSequenceStmt builds and returns a string representing the RenameStmt
|
||||
*/
|
||||
char *
|
||||
DeparseRenameSequenceStmt(Node *node)
|
||||
{
|
||||
RenameStmt *stmt = castNode(RenameStmt, node);
|
||||
StringInfoData str = { 0 };
|
||||
initStringInfo(&str);
|
||||
|
||||
Assert(stmt->renameType == OBJECT_SEQUENCE);
|
||||
|
||||
AppendRenameSequenceStmt(&str, stmt);
|
||||
|
||||
return str.data;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendRenameSequenceStmt appends a string representing the RenameStmt to a buffer
|
||||
*/
|
||||
static void
|
||||
AppendRenameSequenceStmt(StringInfo buf, RenameStmt *stmt)
|
||||
{
|
||||
RangeVar *seq = stmt->relation;
|
||||
|
||||
char *qualifiedSequenceName = quote_qualified_identifier(seq->schemaname,
|
||||
seq->relname);
|
||||
|
||||
appendStringInfoString(buf, "ALTER SEQUENCE ");
|
||||
|
||||
if (stmt->missing_ok)
|
||||
{
|
||||
appendStringInfoString(buf, "IF EXISTS ");
|
||||
}
|
||||
|
||||
appendStringInfoString(buf, qualifiedSequenceName);
|
||||
|
||||
appendStringInfo(buf, " RENAME TO %s", quote_identifier(stmt->newname));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* QualifyRenameSequenceStmt transforms a
|
||||
* ALTER SEQUENCE .. RENAME TO ..
|
||||
* statement in place and makes the sequence name fully qualified.
|
||||
*/
|
||||
void
|
||||
QualifyRenameSequenceStmt(Node *node)
|
||||
{
|
||||
RenameStmt *stmt = castNode(RenameStmt, node);
|
||||
Assert(stmt->renameType == OBJECT_SEQUENCE);
|
||||
|
||||
RangeVar *seq = stmt->relation;
|
||||
|
||||
if (seq->schemaname == NULL)
|
||||
{
|
||||
Oid schemaOid = RangeVarGetCreationNamespace(seq);
|
||||
seq->schemaname = get_namespace_name(schemaOid);
|
||||
}
|
||||
}
|
|
@ -14,9 +14,13 @@
|
|||
#include "distributed/deparser.h"
|
||||
#include "nodes/nodes.h"
|
||||
#include "nodes/parsenodes.h"
|
||||
#include "parser/parse_type.h"
|
||||
#include "utils/builtins.h"
|
||||
|
||||
static void AppendAlterTableSchemaStmt(StringInfo buf, AlterObjectSchemaStmt *stmt);
|
||||
static void AppendAlterTableStmt(StringInfo buf, AlterTableStmt *stmt);
|
||||
static void AppendAlterTableCmd(StringInfo buf, AlterTableCmd *alterTableCmd);
|
||||
static void AppendAlterTableCmdAddColumn(StringInfo buf, AlterTableCmd *alterTableCmd);
|
||||
|
||||
char *
|
||||
DeparseAlterTableSchemaStmt(Node *node)
|
||||
|
@ -46,3 +50,129 @@ AppendAlterTableSchemaStmt(StringInfo buf, AlterObjectSchemaStmt *stmt)
|
|||
const char *newSchemaName = quote_identifier(stmt->newschema);
|
||||
appendStringInfo(buf, "%s SET SCHEMA %s;", tableName, newSchemaName);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DeparseAlterTableStmt builds and returns a string representing the
|
||||
* AlterTableStmt where the object acted upon is of kind OBJECT_TABLE
|
||||
*/
|
||||
char *
|
||||
DeparseAlterTableStmt(Node *node)
|
||||
{
|
||||
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
|
||||
StringInfoData str = { 0 };
|
||||
initStringInfo(&str);
|
||||
|
||||
Assert(stmt->relkind == OBJECT_TABLE);
|
||||
|
||||
AppendAlterTableStmt(&str, stmt);
|
||||
return str.data;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendAlterTableStmt builds and returns an SQL command representing an
|
||||
* ALTER TABLE statement from given AlterTableStmt object where the object
|
||||
* acted upon is of kind OBJECT_TABLE
|
||||
*/
|
||||
static void
|
||||
AppendAlterTableStmt(StringInfo buf, AlterTableStmt *stmt)
|
||||
{
|
||||
const char *identifier = quote_qualified_identifier(stmt->relation->schemaname,
|
||||
stmt->relation->relname);
|
||||
ListCell *cmdCell = NULL;
|
||||
|
||||
Assert(stmt->relkind == OBJECT_TABLE);
|
||||
|
||||
appendStringInfo(buf, "ALTER TABLE %s", identifier);
|
||||
foreach(cmdCell, stmt->cmds)
|
||||
{
|
||||
if (cmdCell != list_head(stmt->cmds))
|
||||
{
|
||||
appendStringInfoString(buf, ", ");
|
||||
}
|
||||
|
||||
AlterTableCmd *alterTableCmd = castNode(AlterTableCmd, lfirst(cmdCell));
|
||||
AppendAlterTableCmd(buf, alterTableCmd);
|
||||
}
|
||||
|
||||
appendStringInfoString(buf, ";");
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendAlterTableCmd builds and appends to the given buffer a command
|
||||
* from given AlterTableCmd object. Currently supported commands are of type
|
||||
* AT_AddColumn and AT_SetNotNull
|
||||
*/
|
||||
static void
|
||||
AppendAlterTableCmd(StringInfo buf, AlterTableCmd *alterTableCmd)
|
||||
{
|
||||
switch (alterTableCmd->subtype)
|
||||
{
|
||||
case AT_AddColumn:
|
||||
{
|
||||
AppendAlterTableCmdAddColumn(buf, alterTableCmd);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
ereport(ERROR, (errmsg("unsupported subtype for alter table command"),
|
||||
errdetail("sub command type: %d", alterTableCmd->subtype)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendAlterTableCmd builds and appends to the given buffer an AT_AddColumn command
|
||||
* from given AlterTableCmd object in the form ADD COLUMN ...
|
||||
*/
|
||||
static void
|
||||
AppendAlterTableCmdAddColumn(StringInfo buf, AlterTableCmd *alterTableCmd)
|
||||
{
|
||||
Assert(alterTableCmd->subtype == AT_AddColumn);
|
||||
|
||||
appendStringInfoString(buf, " ADD COLUMN ");
|
||||
|
||||
ColumnDef *columnDefinition = (ColumnDef *) alterTableCmd->def;
|
||||
|
||||
/*
|
||||
* the way we use the deparser now, constraints are always NULL
|
||||
* adding this check for ColumnDef consistency
|
||||
*/
|
||||
if (columnDefinition->constraints != NULL)
|
||||
{
|
||||
ereport(ERROR, (errmsg("Constraints are not supported for AT_AddColumn")));
|
||||
}
|
||||
|
||||
if (columnDefinition->colname)
|
||||
{
|
||||
appendStringInfo(buf, "%s ", quote_identifier(columnDefinition->colname));
|
||||
}
|
||||
|
||||
int32 typmod = 0;
|
||||
Oid typeOid = InvalidOid;
|
||||
bits16 formatFlags = FORMAT_TYPE_TYPEMOD_GIVEN | FORMAT_TYPE_FORCE_QUALIFY;
|
||||
typenameTypeIdAndMod(NULL, columnDefinition->typeName, &typeOid, &typmod);
|
||||
appendStringInfo(buf, "%s", format_type_extended(typeOid, typmod,
|
||||
formatFlags));
|
||||
if (columnDefinition->is_not_null)
|
||||
{
|
||||
appendStringInfoString(buf, " NOT NULL");
|
||||
}
|
||||
|
||||
/*
|
||||
* the way we use the deparser now, collation is never used
|
||||
* since the data type of columns that use sequences for default
|
||||
* are only int,smallint and bigint (never text, varchar, char)
|
||||
* Adding this part only for ColumnDef consistency
|
||||
*/
|
||||
Oid collationOid = GetColumnDefCollation(NULL, columnDefinition, typeOid);
|
||||
if (OidIsValid(collationOid))
|
||||
{
|
||||
const char *identifier = FormatCollateBEQualified(collationOid);
|
||||
appendStringInfo(buf, " COLLATE %s", identifier);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -124,6 +124,7 @@
|
|||
#include "miscadmin.h"
|
||||
#include "pgstat.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
|
@ -142,6 +143,7 @@
|
|||
#include "distributed/deparse_shard_query.h"
|
||||
#include "distributed/shared_connection_stats.h"
|
||||
#include "distributed/distributed_execution_locks.h"
|
||||
#include "distributed/intermediate_result_pruning.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/local_executor.h"
|
||||
#include "distributed/multi_client_executor.h"
|
||||
|
@ -413,6 +415,10 @@ typedef struct WorkerPool
|
|||
* use it anymore.
|
||||
*/
|
||||
WorkerPoolFailureState failureState;
|
||||
|
||||
/* execution statistics per pool, in microseconds */
|
||||
uint64 totalTaskExecutionTime;
|
||||
int totalExecutedTasks;
|
||||
} WorkerPool;
|
||||
|
||||
struct TaskPlacementExecution;
|
||||
|
@ -472,6 +478,8 @@ bool EnableBinaryProtocol = false;
|
|||
|
||||
/* GUC, number of ms to wait between opening connections to the same worker */
|
||||
int ExecutorSlowStartInterval = 10;
|
||||
bool EnableCostBasedConnectionEstablishment = true;
|
||||
bool PreventIncompleteConnectionEstablishment = true;
|
||||
|
||||
|
||||
/*
|
||||
|
@ -634,6 +642,12 @@ static WorkerSession * FindOrCreateWorkerSession(WorkerPool *workerPool,
|
|||
static void ManageWorkerPool(WorkerPool *workerPool);
|
||||
static bool ShouldWaitForSlowStart(WorkerPool *workerPool);
|
||||
static int CalculateNewConnectionCount(WorkerPool *workerPool);
|
||||
static bool UsingExistingSessionsCheaperThanEstablishingNewConnections(int
|
||||
readyTaskCount,
|
||||
WorkerPool *
|
||||
workerPool);
|
||||
static double AvgTaskExecutionTimeApproximation(WorkerPool *workerPool);
|
||||
static double AvgConnectionEstablishmentTime(WorkerPool *workerPool);
|
||||
static void OpenNewConnections(WorkerPool *workerPool, int newConnectionCount,
|
||||
TransactionProperties *transactionProperties);
|
||||
static void CheckConnectionTimeout(WorkerPool *workerPool);
|
||||
|
@ -650,6 +664,7 @@ static bool StartPlacementExecutionOnSession(TaskPlacementExecution *placementEx
|
|||
static bool SendNextQuery(TaskPlacementExecution *placementExecution,
|
||||
WorkerSession *session);
|
||||
static void ConnectionStateMachine(WorkerSession *session);
|
||||
static bool HasUnfinishedTaskForSession(WorkerSession *session);
|
||||
static void HandleMultiConnectionSuccess(WorkerSession *session);
|
||||
static bool HasAnyConnectionFailure(WorkerPool *workerPool);
|
||||
static void Activate2PCIfModifyingTransactionExpandsToNewNode(WorkerSession *session);
|
||||
|
@ -675,15 +690,20 @@ static void ExtractParametersForRemoteExecution(ParamListInfo paramListInfo,
|
|||
Oid **parameterTypes,
|
||||
const char ***parameterValues);
|
||||
static int GetEventSetSize(List *sessionList);
|
||||
static bool HasIncompleteConnectionEstablishment(DistributedExecution *execution);
|
||||
static int RebuildWaitEventSet(DistributedExecution *execution);
|
||||
static void ProcessWaitEvents(DistributedExecution *execution, WaitEvent *events, int
|
||||
eventCount, bool *cancellationReceived);
|
||||
static long MillisecondsBetweenTimestamps(instr_time startTime, instr_time endTime);
|
||||
static uint64 MicrosecondsBetweenTimestamps(instr_time startTime, instr_time endTime);
|
||||
static HeapTuple BuildTupleFromBytes(AttInMetadata *attinmeta, fmStringInfo *values);
|
||||
static AttInMetadata * TupleDescGetAttBinaryInMetadata(TupleDesc tupdesc);
|
||||
static int WorkerPoolCompare(const void *lhsKey, const void *rhsKey);
|
||||
static void SetAttributeInputMetadata(DistributedExecution *execution,
|
||||
ShardCommandExecution *shardCommandExecution);
|
||||
static void LookupTaskPlacementHostAndPort(ShardPlacement *taskPlacement, char **nodeName,
|
||||
int *nodePort);
|
||||
static bool IsDummyPlacement(ShardPlacement *taskPlacement);
|
||||
|
||||
/*
|
||||
* AdaptiveExecutorPreExecutorRun gets called right before postgres starts its executor
|
||||
|
@ -742,6 +762,12 @@ AdaptiveExecutor(CitusScanState *scanState)
|
|||
/* we should only call this once before the scan finished */
|
||||
Assert(!scanState->finishedRemoteScan);
|
||||
|
||||
MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext,
|
||||
"AdaptiveExecutor",
|
||||
ALLOCSET_DEFAULT_SIZES);
|
||||
MemoryContext oldContext = MemoryContextSwitchTo(localContext);
|
||||
|
||||
|
||||
/* Reset Task fields that are only valid for a single execution */
|
||||
ResetExplainAnalyzeData(taskList);
|
||||
|
||||
|
@ -830,6 +856,8 @@ AdaptiveExecutor(CitusScanState *scanState)
|
|||
SortTupleStore(scanState);
|
||||
}
|
||||
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
|
||||
return resultSlot;
|
||||
}
|
||||
|
||||
|
@ -1227,7 +1255,7 @@ StartDistributedExecution(DistributedExecution *execution)
|
|||
|
||||
if (xactProperties->requires2PC)
|
||||
{
|
||||
CoordinatedTransactionShouldUse2PC();
|
||||
Use2PCForCoordinatedTransaction();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1630,8 +1658,10 @@ CleanUpSessions(DistributedExecution *execution)
|
|||
{
|
||||
MultiConnection *connection = session->connection;
|
||||
|
||||
ereport(DEBUG4, (errmsg("Total number of commands sent over the session %ld: %ld",
|
||||
session->sessionId, session->commandsSent)));
|
||||
ereport(DEBUG4, (errmsg("Total number of commands sent over the session %ld: %ld "
|
||||
"to node %s:%d", session->sessionId,
|
||||
session->commandsSent,
|
||||
connection->hostname, connection->port)));
|
||||
|
||||
UnclaimConnection(connection);
|
||||
|
||||
|
@ -1751,8 +1781,10 @@ AssignTasksToConnectionsOrWorkerPool(DistributedExecution *execution)
|
|||
foreach_ptr(taskPlacement, task->taskPlacementList)
|
||||
{
|
||||
int connectionFlags = 0;
|
||||
char *nodeName = taskPlacement->nodeName;
|
||||
int nodePort = taskPlacement->nodePort;
|
||||
char *nodeName = NULL;
|
||||
int nodePort = 0;
|
||||
LookupTaskPlacementHostAndPort(taskPlacement, &nodeName, &nodePort);
|
||||
|
||||
WorkerPool *workerPool = FindOrCreateWorkerPool(execution, nodeName,
|
||||
nodePort);
|
||||
|
||||
|
@ -1900,6 +1932,48 @@ AssignTasksToConnectionsOrWorkerPool(DistributedExecution *execution)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* LookupTaskPlacementHostAndPort sets the nodename and nodeport for the given task placement
|
||||
* with a lookup.
|
||||
*/
|
||||
static void
|
||||
LookupTaskPlacementHostAndPort(ShardPlacement *taskPlacement, char **nodeName,
|
||||
int *nodePort)
|
||||
{
|
||||
if (IsDummyPlacement(taskPlacement))
|
||||
{
|
||||
/*
|
||||
* If we create a dummy placement for the local node, it is possible
|
||||
* that the entry doesn't exist in pg_dist_node, hence a lookup will fail.
|
||||
* In that case we want to use the dummy placements values.
|
||||
*/
|
||||
*nodeName = taskPlacement->nodeName;
|
||||
*nodePort = taskPlacement->nodePort;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* We want to lookup the node information again since it is possible that
|
||||
* there were changes in pg_dist_node and we will get those invalidations
|
||||
* in LookupNodeForGroup.
|
||||
*/
|
||||
WorkerNode *workerNode = LookupNodeForGroup(taskPlacement->groupId);
|
||||
*nodeName = workerNode->workerName;
|
||||
*nodePort = workerNode->workerPort;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsDummyPlacement returns true if the given placement is a dummy placement.
|
||||
*/
|
||||
static bool
|
||||
IsDummyPlacement(ShardPlacement *taskPlacement)
|
||||
{
|
||||
return taskPlacement->nodeId == LOCAL_NODE_ID;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* WorkerPoolCompare is based on WorkerNodeCompare function. The function
|
||||
* compares two worker nodes by their host name and port number.
|
||||
|
@ -2217,7 +2291,26 @@ RunDistributedExecution(DistributedExecution *execution)
|
|||
/* always (re)build the wait event set the first time */
|
||||
execution->rebuildWaitEventSet = true;
|
||||
|
||||
while (execution->unfinishedTaskCount > 0 && !cancellationReceived)
|
||||
/*
|
||||
* Iterate until all the tasks are finished. Once all the tasks
|
||||
* are finished, ensure that that all the connection initializations
|
||||
* are also finished. Otherwise, those connections are terminated
|
||||
* abruptly before they are established (or failed). Instead, we let
|
||||
* the ConnectionStateMachine() to properly handle them.
|
||||
*
|
||||
* Note that we could have the connections that are not established
|
||||
* as a side effect of slow-start algorithm. At the time the algorithm
|
||||
* decides to establish new connections, the execution might have tasks
|
||||
* to finish. But, the execution might finish before the new connections
|
||||
* are established.
|
||||
*
|
||||
* Note that the rules explained above could be overriden by any
|
||||
* cancellation to the query. In that case, we terminate the execution
|
||||
* irrespective of the current status of the tasks or the connections.
|
||||
*/
|
||||
while (!cancellationReceived &&
|
||||
(execution->unfinishedTaskCount > 0 ||
|
||||
HasIncompleteConnectionEstablishment(execution)))
|
||||
{
|
||||
WorkerPool *workerPool = NULL;
|
||||
foreach_ptr(workerPool, execution->workerList)
|
||||
|
@ -2299,6 +2392,33 @@ RunDistributedExecution(DistributedExecution *execution)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* HasIncompleteConnectionEstablishment returns true if any of the connections
|
||||
* that has been initiated by the executor is in initilization stage.
|
||||
*/
|
||||
static bool
|
||||
HasIncompleteConnectionEstablishment(DistributedExecution *execution)
|
||||
{
|
||||
if (!PreventIncompleteConnectionEstablishment)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
WorkerSession *session = NULL;
|
||||
foreach_ptr(session, execution->sessionList)
|
||||
{
|
||||
MultiConnection *connection = session->connection;
|
||||
if (connection->connectionState == MULTI_CONNECTION_INITIAL ||
|
||||
connection->connectionState == MULTI_CONNECTION_CONNECTING)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RebuildWaitEventSet updates the waitEventSet for the distributed execution.
|
||||
* This happens when the connection set for the distributed execution is changed,
|
||||
|
@ -2398,6 +2518,9 @@ ManageWorkerPool(WorkerPool *workerPool)
|
|||
return;
|
||||
}
|
||||
|
||||
/* increase the open rate every cycle (like TCP slow start) */
|
||||
workerPool->maxNewConnectionsPerCycle += 1;
|
||||
|
||||
OpenNewConnections(workerPool, newConnectionCount, execution->transactionProperties);
|
||||
|
||||
/*
|
||||
|
@ -2584,16 +2707,176 @@ CalculateNewConnectionCount(WorkerPool *workerPool)
|
|||
* than the target pool size.
|
||||
*/
|
||||
newConnectionCount = Min(newConnectionsForReadyTasks, maxNewConnectionCount);
|
||||
if (newConnectionCount > 0)
|
||||
if (EnableCostBasedConnectionEstablishment && newConnectionCount > 0 &&
|
||||
initiatedConnectionCount <= MaxCachedConnectionsPerWorker &&
|
||||
UsingExistingSessionsCheaperThanEstablishingNewConnections(
|
||||
readyTaskCount, workerPool))
|
||||
{
|
||||
/* increase the open rate every cycle (like TCP slow start) */
|
||||
workerPool->maxNewConnectionsPerCycle += 1;
|
||||
/*
|
||||
* Before giving the decision, we do one more check. If the cost of
|
||||
* executing the remaining tasks over the existing sessions in the
|
||||
* pool is cheaper than establishing new connections and executing
|
||||
* the tasks over the new connections, we prefer the former.
|
||||
*
|
||||
* For cached connections we should ignore any optimizations as
|
||||
* cached connections are almost free to get. In other words,
|
||||
* as long as there are cached connections that the pool has
|
||||
* not used yet, aggressively use these already established
|
||||
* connections.
|
||||
*
|
||||
* Note that until MaxCachedConnectionsPerWorker has already been
|
||||
* established within the session, we still need to establish
|
||||
* the connections right now.
|
||||
*
|
||||
* Also remember that we are not trying to find the optimal number
|
||||
* of connections for the remaining tasks here. Our goal is to prevent
|
||||
* connection establishments that are absolutely unnecessary. In the
|
||||
* future, we may improve the calculations below to find the optimal
|
||||
* number of new connections required.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return newConnectionCount;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* UsingExistingSessionsCheaperThanEstablishingNewConnections returns true if
|
||||
* using the already established connections takes less time compared to opening
|
||||
* new connections based on the current execution's stats.
|
||||
*
|
||||
* The function returns false if the current execution has not established any connections
|
||||
* or finished any tasks (e.g., no stats to act on).
|
||||
*/
|
||||
static bool
|
||||
UsingExistingSessionsCheaperThanEstablishingNewConnections(int readyTaskCount,
|
||||
WorkerPool *workerPool)
|
||||
{
|
||||
int activeConnectionCount = workerPool->activeConnectionCount;
|
||||
if (workerPool->totalExecutedTasks < 1 || activeConnectionCount < 1)
|
||||
{
|
||||
/*
|
||||
* The pool has not finished any connection establishment or
|
||||
* task yet. So, we refrain from optimizing the execution.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
double avgTaskExecutionTime = AvgTaskExecutionTimeApproximation(workerPool);
|
||||
double avgConnectionEstablishmentTime = AvgConnectionEstablishmentTime(workerPool);
|
||||
|
||||
/* we assume that we are halfway through the execution */
|
||||
double remainingTimeForActiveTaskExecutionsToFinish = avgTaskExecutionTime / 2;
|
||||
|
||||
/*
|
||||
* We use "newConnectionCount" as if it is the task count as
|
||||
* we are only interested in this iteration of CalculateNewConnectionCount().
|
||||
*/
|
||||
double totalTimeToExecuteNewTasks = avgTaskExecutionTime * readyTaskCount;
|
||||
|
||||
double estimatedExecutionTimeForNewTasks =
|
||||
floor(totalTimeToExecuteNewTasks / activeConnectionCount);
|
||||
|
||||
/*
|
||||
* First finish the already running tasks, and then use the connections
|
||||
* to execute the new tasks.
|
||||
*/
|
||||
double costOfExecutingTheTasksOverExistingConnections =
|
||||
remainingTimeForActiveTaskExecutionsToFinish +
|
||||
estimatedExecutionTimeForNewTasks;
|
||||
|
||||
/*
|
||||
* For every task, the executor is supposed to establish one
|
||||
* connection and then execute the task over the connection.
|
||||
*/
|
||||
double costOfExecutingTheTasksOverNewConnection =
|
||||
(avgTaskExecutionTime + avgConnectionEstablishmentTime);
|
||||
|
||||
return (costOfExecutingTheTasksOverExistingConnections <=
|
||||
costOfExecutingTheTasksOverNewConnection);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AvgTaskExecutionTimeApproximation returns the approximation of the average task
|
||||
* execution times on the workerPool.
|
||||
*/
|
||||
static double
|
||||
AvgTaskExecutionTimeApproximation(WorkerPool *workerPool)
|
||||
{
|
||||
uint64 totalTaskExecutionTime = workerPool->totalTaskExecutionTime;
|
||||
int taskCount = workerPool->totalExecutedTasks;
|
||||
|
||||
instr_time now;
|
||||
INSTR_TIME_SET_CURRENT(now);
|
||||
|
||||
WorkerSession *session = NULL;
|
||||
foreach_ptr(session, workerPool->sessionList)
|
||||
{
|
||||
/*
|
||||
* Involve the tasks that are currently running. We do this to
|
||||
* make sure that the execution responds with new connections
|
||||
* quickly if the actively running tasks
|
||||
*/
|
||||
TaskPlacementExecution *placementExecution = session->currentTask;
|
||||
if (placementExecution != NULL &&
|
||||
placementExecution->executionState == PLACEMENT_EXECUTION_RUNNING)
|
||||
{
|
||||
uint64 durationInMicroSecs =
|
||||
MicrosecondsBetweenTimestamps(placementExecution->startTime, now);
|
||||
|
||||
/*
|
||||
* Our approximation is that we assume that the task execution is
|
||||
* just in the halfway through.
|
||||
*/
|
||||
totalTaskExecutionTime += (2 * durationInMicroSecs);
|
||||
taskCount += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return taskCount == 0 ? 0 : ((double) totalTaskExecutionTime / taskCount);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AvgConnectionEstablishmentTime calculates the average connection establishment times
|
||||
* for the input workerPool.
|
||||
*/
|
||||
static double
|
||||
AvgConnectionEstablishmentTime(WorkerPool *workerPool)
|
||||
{
|
||||
double totalTimeMicrosec = 0;
|
||||
int sessionCount = 0;
|
||||
|
||||
WorkerSession *session = NULL;
|
||||
foreach_ptr(session, workerPool->sessionList)
|
||||
{
|
||||
MultiConnection *connection = session->connection;
|
||||
|
||||
/*
|
||||
* There could be MaxCachedConnectionsPerWorker connections that are
|
||||
* already connected. Those connections might skew the average
|
||||
* connection establishment times for the current execution. The reason
|
||||
* is that they are established earlier and the connection establishment
|
||||
* times might be different at the moment those connections are established.
|
||||
*/
|
||||
if (connection->connectionState == MULTI_CONNECTION_CONNECTED)
|
||||
{
|
||||
long connectionEstablishmentTime =
|
||||
MicrosecondsBetweenTimestamps(connection->connectionEstablishmentStart,
|
||||
connection->connectionEstablishmentEnd);
|
||||
|
||||
totalTimeMicrosec += connectionEstablishmentTime;
|
||||
++sessionCount;
|
||||
}
|
||||
}
|
||||
|
||||
return (sessionCount == 0) ? 0 : (totalTimeMicrosec / sessionCount);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* OpenNewConnections opens the given amount of connections for the given workerPool.
|
||||
*/
|
||||
|
@ -2898,6 +3181,18 @@ MillisecondsBetweenTimestamps(instr_time startTime, instr_time endTime)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* MicrosecondsBetweenTimestamps is a helper to get the number of microseconds
|
||||
* between timestamps.
|
||||
*/
|
||||
static uint64
|
||||
MicrosecondsBetweenTimestamps(instr_time startTime, instr_time endTime)
|
||||
{
|
||||
INSTR_TIME_SUBTRACT(endTime, startTime);
|
||||
return INSTR_TIME_GET_MICROSEC(endTime);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ConnectionStateMachine opens a connection and descends into the transaction
|
||||
* state machine when ready.
|
||||
|
@ -2995,8 +3290,32 @@ ConnectionStateMachine(WorkerSession *session)
|
|||
|
||||
case MULTI_CONNECTION_CONNECTED:
|
||||
{
|
||||
/* connection is ready, run the transaction state machine */
|
||||
if (HasUnfinishedTaskForSession(session))
|
||||
{
|
||||
/*
|
||||
* Connection is ready, and we have unfinished tasks.
|
||||
* So, run the transaction state machine.
|
||||
*/
|
||||
TransactionStateMachine(session);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Connection is ready, but we don't have any unfinished
|
||||
* tasks that this session can execute.
|
||||
*
|
||||
* Note that we can be in a situation where the executor
|
||||
* decides to establish a connection, but not need to
|
||||
* use it at the time the connection is established. This could
|
||||
* happen when the earlier connections manages to finish all the
|
||||
* tasks after this connection
|
||||
*
|
||||
* As no tasks are ready to be executed at the moment, we
|
||||
* mark the socket readable to get any notices if exists.
|
||||
*/
|
||||
UpdateConnectionWaitFlags(session, WL_SOCKET_READABLE);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -3111,6 +3430,41 @@ ConnectionStateMachine(WorkerSession *session)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* HasUnfinishedTaskForSession gets a session and returns true if there
|
||||
* are any tasks that this session can execute.
|
||||
*/
|
||||
static bool
|
||||
HasUnfinishedTaskForSession(WorkerSession *session)
|
||||
{
|
||||
if (session->currentTask != NULL)
|
||||
{
|
||||
/* the session is executing a command right now */
|
||||
return true;
|
||||
}
|
||||
|
||||
dlist_head *sessionReadyTaskQueue = &(session->readyTaskQueue);
|
||||
if (!dlist_is_empty(sessionReadyTaskQueue))
|
||||
{
|
||||
/* session has an assigned task, which is ready for execution */
|
||||
return true;
|
||||
}
|
||||
|
||||
WorkerPool *workerPool = session->workerPool;
|
||||
dlist_head *poolReadyTaskQueue = &(workerPool->readyTaskQueue);
|
||||
if (!dlist_is_empty(poolReadyTaskQueue))
|
||||
{
|
||||
/*
|
||||
* Pool has unassigned tasks that can be executed
|
||||
* by the input session.
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* HandleMultiConnectionSuccess logs the established connection and updates
|
||||
* connection's state.
|
||||
|
@ -3124,10 +3478,10 @@ HandleMultiConnectionSuccess(WorkerSession *session)
|
|||
MarkConnectionConnected(connection);
|
||||
|
||||
ereport(DEBUG4, (errmsg("established connection to %s:%d for "
|
||||
"session %ld in %ld msecs",
|
||||
"session %ld in %ld microseconds",
|
||||
connection->hostname, connection->port,
|
||||
session->sessionId,
|
||||
MillisecondsBetweenTimestamps(
|
||||
MicrosecondsBetweenTimestamps(
|
||||
connection->connectionEstablishmentStart,
|
||||
connection->connectionEstablishmentEnd))));
|
||||
|
||||
|
@ -3188,7 +3542,7 @@ Activate2PCIfModifyingTransactionExpandsToNewNode(WorkerSession *session)
|
|||
* just opened, which means we're now going to make modifications
|
||||
* over multiple connections. Activate 2PC!
|
||||
*/
|
||||
CoordinatedTransactionShouldUse2PC();
|
||||
Use2PCForCoordinatedTransaction();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4270,19 +4624,20 @@ PlacementExecutionDone(TaskPlacementExecution *placementExecution, bool succeede
|
|||
|
||||
Assert(INSTR_TIME_IS_ZERO(placementExecution->endTime));
|
||||
INSTR_TIME_SET_CURRENT(placementExecution->endTime);
|
||||
uint64 durationMicrosecs =
|
||||
MicrosecondsBetweenTimestamps(placementExecution->startTime,
|
||||
placementExecution->endTime);
|
||||
workerPool->totalTaskExecutionTime += durationMicrosecs;
|
||||
workerPool->totalExecutedTasks += 1;
|
||||
|
||||
if (IsLoggableLevel(DEBUG4))
|
||||
{
|
||||
long durationMillisecs =
|
||||
MillisecondsBetweenTimestamps(placementExecution->startTime,
|
||||
placementExecution->endTime);
|
||||
|
||||
ereport(DEBUG4, (errmsg("task execution (%d) for placement (%ld) on anchor "
|
||||
"shard (%ld) finished in %ld msecs on worker "
|
||||
"shard (%ld) finished in %ld microseconds on worker "
|
||||
"node %s:%d", shardCommandExecution->task->taskId,
|
||||
placementExecution->shardPlacement->placementId,
|
||||
shardCommandExecution->task->anchorShardId,
|
||||
durationMillisecs, workerPool->nodeName,
|
||||
durationMicrosecs, workerPool->nodeName,
|
||||
workerPool->nodePort)));
|
||||
}
|
||||
}
|
||||
|
@ -4457,8 +4812,6 @@ ScheduleNextPlacementExecution(TaskPlacementExecution *placementExecution, bool
|
|||
executionOrder == EXECUTION_ORDER_SEQUENTIAL)
|
||||
{
|
||||
TaskPlacementExecution *nextPlacementExecution = NULL;
|
||||
int placementExecutionCount PG_USED_FOR_ASSERTS_ONLY =
|
||||
shardCommandExecution->placementExecutionCount;
|
||||
|
||||
/* find a placement execution that is not yet marked as failed */
|
||||
do {
|
||||
|
@ -4469,6 +4822,7 @@ ScheduleNextPlacementExecution(TaskPlacementExecution *placementExecution, bool
|
|||
* If all tasks failed then we should already have errored out.
|
||||
* Still, be defensive and throw error instead of crashes.
|
||||
*/
|
||||
int placementExecutionCount = shardCommandExecution->placementExecutionCount;
|
||||
if (nextPlacementExecutionIndex >= placementExecutionCount)
|
||||
{
|
||||
WorkerPool *workerPool = placementExecution->workerPool;
|
||||
|
|
|
@ -189,6 +189,12 @@ CitusBeginScan(CustomScanState *node, EState *estate, int eflags)
|
|||
{
|
||||
CitusBeginModifyScan(node, estate, eflags);
|
||||
}
|
||||
|
||||
/*
|
||||
* In case of a prepared statement, we will see this distributed plan again
|
||||
* on the next execution with a higher usage counter.
|
||||
*/
|
||||
distributedPlan->numberOfTimesExecuted++;
|
||||
}
|
||||
|
||||
|
||||
|
@ -315,6 +321,11 @@ CitusBeginModifyScan(CustomScanState *node, EState *estate, int eflags)
|
|||
PlanState *planState = &(scanState->customScanState.ss.ps);
|
||||
DistributedPlan *originalDistributedPlan = scanState->distributedPlan;
|
||||
|
||||
MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext,
|
||||
"CitusBeginModifyScan",
|
||||
ALLOCSET_DEFAULT_SIZES);
|
||||
MemoryContext oldContext = MemoryContextSwitchTo(localContext);
|
||||
|
||||
DistributedPlan *currentPlan =
|
||||
CopyDistributedPlanWithoutCache(originalDistributedPlan);
|
||||
scanState->distributedPlan = currentPlan;
|
||||
|
@ -405,6 +416,8 @@ CitusBeginModifyScan(CustomScanState *node, EState *estate, int eflags)
|
|||
*/
|
||||
CacheLocalPlanForShardQuery(task, originalDistributedPlan);
|
||||
}
|
||||
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -118,6 +118,8 @@ PG_FUNCTION_INFO_V1(fetch_intermediate_results);
|
|||
Datum
|
||||
broadcast_intermediate_result(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
text *resultIdText = PG_GETARG_TEXT_P(0);
|
||||
char *resultIdString = text_to_cstring(resultIdText);
|
||||
text *queryText = PG_GETARG_TEXT_P(1);
|
||||
|
@ -125,8 +127,6 @@ broadcast_intermediate_result(PG_FUNCTION_ARGS)
|
|||
bool writeLocalFile = false;
|
||||
ParamListInfo paramListInfo = NULL;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/*
|
||||
* Make sure that this transaction has a distributed transaction ID.
|
||||
*
|
||||
|
@ -159,6 +159,8 @@ broadcast_intermediate_result(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
create_intermediate_result(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
text *resultIdText = PG_GETARG_TEXT_P(0);
|
||||
char *resultIdString = text_to_cstring(resultIdText);
|
||||
text *queryText = PG_GETARG_TEXT_P(1);
|
||||
|
@ -167,8 +169,6 @@ create_intermediate_result(PG_FUNCTION_ARGS)
|
|||
bool writeLocalFile = true;
|
||||
ParamListInfo paramListInfo = NULL;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/*
|
||||
* Make sure that this transaction has a distributed transaction ID.
|
||||
*
|
||||
|
@ -771,13 +771,13 @@ IntermediateResultSize(const char *resultId)
|
|||
Datum
|
||||
read_intermediate_result(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Datum resultId = PG_GETARG_DATUM(0);
|
||||
Datum copyFormatOidDatum = PG_GETARG_DATUM(1);
|
||||
Datum copyFormatLabelDatum = DirectFunctionCall1(enum_out, copyFormatOidDatum);
|
||||
char *copyFormatLabel = DatumGetCString(copyFormatLabelDatum);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
ReadIntermediateResultsIntoFuncOutput(fcinfo, copyFormatLabel, &resultId, 1);
|
||||
|
||||
PG_RETURN_DATUM(0);
|
||||
|
@ -794,14 +794,14 @@ read_intermediate_result(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
read_intermediate_result_array(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
ArrayType *resultIdObject = PG_GETARG_ARRAYTYPE_P(0);
|
||||
Datum copyFormatOidDatum = PG_GETARG_DATUM(1);
|
||||
|
||||
Datum copyFormatLabelDatum = DirectFunctionCall1(enum_out, copyFormatOidDatum);
|
||||
char *copyFormatLabel = DatumGetCString(copyFormatLabelDatum);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
int32 resultCount = ArrayGetNItems(ARR_NDIM(resultIdObject), ARR_DIMS(
|
||||
resultIdObject));
|
||||
Datum *resultIdArray = DeconstructArrayObject(resultIdObject);
|
||||
|
@ -874,6 +874,8 @@ ReadIntermediateResultsIntoFuncOutput(FunctionCallInfo fcinfo, char *copyFormat,
|
|||
Datum
|
||||
fetch_intermediate_results(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
ArrayType *resultIdObject = PG_GETARG_ARRAYTYPE_P(0);
|
||||
Datum *resultIdArray = DeconstructArrayObject(resultIdObject);
|
||||
int32 resultCount = ArrayObjectCount(resultIdObject);
|
||||
|
@ -885,8 +887,6 @@ fetch_intermediate_results(PG_FUNCTION_ARGS)
|
|||
int resultIndex = 0;
|
||||
int64 totalBytesWritten = 0L;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (resultCount == 0)
|
||||
{
|
||||
PG_RETURN_INT64(0);
|
||||
|
|
|
@ -108,8 +108,15 @@
|
|||
bool EnableLocalExecution = true;
|
||||
bool LogLocalCommands = false;
|
||||
|
||||
int LocalExecutorLevel = 0;
|
||||
|
||||
static LocalExecutionStatus CurrentLocalExecutionStatus = LOCAL_EXECUTION_OPTIONAL;
|
||||
|
||||
static uint64 ExecuteLocalTaskListInternal(List *taskList,
|
||||
ParamListInfo paramListInfo,
|
||||
DistributedPlan *distributedPlan,
|
||||
TupleDestination *defaultTupleDest,
|
||||
bool isUtilityCommand);
|
||||
static void SplitLocalAndRemotePlacements(List *taskPlacementList,
|
||||
List **localTaskPlacementList,
|
||||
List **remoteTaskPlacementList);
|
||||
|
@ -200,10 +207,8 @@ ExecuteLocalTaskListExtended(List *taskList,
|
|||
TupleDestination *defaultTupleDest,
|
||||
bool isUtilityCommand)
|
||||
{
|
||||
ParamListInfo paramListInfo = copyParamList(orig_paramListInfo);
|
||||
int numParams = 0;
|
||||
Oid *parameterTypes = NULL;
|
||||
uint64 totalRowsProcessed = 0;
|
||||
ParamListInfo paramListInfo = copyParamList(orig_paramListInfo);
|
||||
|
||||
/*
|
||||
* Even if we are executing local tasks, we still enable
|
||||
|
@ -218,6 +223,38 @@ ExecuteLocalTaskListExtended(List *taskList,
|
|||
*/
|
||||
UseCoordinatedTransaction();
|
||||
|
||||
LocalExecutorLevel++;
|
||||
PG_TRY();
|
||||
{
|
||||
totalRowsProcessed = ExecuteLocalTaskListInternal(taskList, paramListInfo,
|
||||
distributedPlan,
|
||||
defaultTupleDest,
|
||||
isUtilityCommand);
|
||||
}
|
||||
PG_CATCH();
|
||||
{
|
||||
LocalExecutorLevel--;
|
||||
|
||||
PG_RE_THROW();
|
||||
}
|
||||
PG_END_TRY();
|
||||
LocalExecutorLevel--;
|
||||
|
||||
return totalRowsProcessed;
|
||||
}
|
||||
|
||||
|
||||
static uint64
|
||||
ExecuteLocalTaskListInternal(List *taskList,
|
||||
ParamListInfo paramListInfo,
|
||||
DistributedPlan *distributedPlan,
|
||||
TupleDestination *defaultTupleDest,
|
||||
bool isUtilityCommand)
|
||||
{
|
||||
uint64 totalRowsProcessed = 0;
|
||||
int numParams = 0;
|
||||
Oid *parameterTypes = NULL;
|
||||
|
||||
if (paramListInfo != NULL)
|
||||
{
|
||||
/* not used anywhere, so declare here */
|
||||
|
@ -229,9 +266,19 @@ ExecuteLocalTaskListExtended(List *taskList,
|
|||
numParams = paramListInfo->numParams;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use a new memory context that gets reset after every task to free
|
||||
* the deparsed query string and query plan.
|
||||
*/
|
||||
MemoryContext loopContext = AllocSetContextCreate(CurrentMemoryContext,
|
||||
"ExecuteLocalTaskListExtended",
|
||||
ALLOCSET_DEFAULT_SIZES);
|
||||
|
||||
Task *task = NULL;
|
||||
foreach_ptr(task, taskList)
|
||||
{
|
||||
MemoryContext oldContext = MemoryContextSwitchTo(loopContext);
|
||||
|
||||
TupleDestination *tupleDest = task->tupleDest ?
|
||||
task->tupleDest :
|
||||
defaultTupleDest;
|
||||
|
@ -253,7 +300,7 @@ ExecuteLocalTaskListExtended(List *taskList,
|
|||
* queries are also ReadOnly, our 2PC logic is smart enough to skip sending
|
||||
* PREPARE to those connections.
|
||||
*/
|
||||
CoordinatedTransactionShouldUse2PC();
|
||||
Use2PCForCoordinatedTransaction();
|
||||
}
|
||||
|
||||
LogLocalCommand(task);
|
||||
|
@ -261,6 +308,9 @@ ExecuteLocalTaskListExtended(List *taskList,
|
|||
if (isUtilityCommand)
|
||||
{
|
||||
ExecuteUtilityCommand(TaskQueryString(task));
|
||||
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
MemoryContextReset(loopContext);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -308,6 +358,9 @@ ExecuteLocalTaskListExtended(List *taskList,
|
|||
totalRowsProcessed +=
|
||||
LocallyPlanAndExecuteMultipleQueries(queryStringList, tupleDest,
|
||||
task);
|
||||
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
MemoryContextReset(loopContext);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -343,6 +396,9 @@ ExecuteLocalTaskListExtended(List *taskList,
|
|||
totalRowsProcessed +=
|
||||
ExecuteLocalTaskPlan(localPlan, shardQueryString,
|
||||
tupleDest, task, paramListInfo);
|
||||
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
MemoryContextReset(loopContext);
|
||||
}
|
||||
|
||||
return totalRowsProcessed;
|
||||
|
@ -582,6 +638,12 @@ ExecuteLocalTaskPlan(PlannedStmt *taskPlan, char *queryString,
|
|||
|
||||
RecordNonDistTableAccessesForTask(task);
|
||||
|
||||
MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext,
|
||||
"ExecuteLocalTaskPlan",
|
||||
ALLOCSET_DEFAULT_SIZES);
|
||||
|
||||
MemoryContext oldContext = MemoryContextSwitchTo(localContext);
|
||||
|
||||
/*
|
||||
* Some tuple destinations look at task->taskPlacementList to determine
|
||||
* where the result came from using the placement index. Since a local
|
||||
|
@ -625,6 +687,9 @@ ExecuteLocalTaskPlan(PlannedStmt *taskPlan, char *queryString,
|
|||
|
||||
FreeQueryDesc(queryDesc);
|
||||
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
MemoryContextDelete(localContext);
|
||||
|
||||
return totalRowsProcessed;
|
||||
}
|
||||
|
||||
|
|
|
@ -107,6 +107,8 @@ PG_FUNCTION_INFO_V1(worker_partition_query_result);
|
|||
Datum
|
||||
worker_partition_query_result(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
ReturnSetInfo *resultInfo = (ReturnSetInfo *) fcinfo->resultinfo;
|
||||
|
||||
text *resultIdPrefixText = PG_GETARG_TEXT_P(0);
|
||||
|
@ -136,8 +138,6 @@ worker_partition_query_result(PG_FUNCTION_ARGS)
|
|||
|
||||
bool binaryCopy = PG_GETARG_BOOL(6);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (!IsMultiStatementTransaction())
|
||||
{
|
||||
ereport(ERROR, (errmsg("worker_partition_query_result can only be used in a "
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "access/htup_details.h"
|
||||
#include "access/skey.h"
|
||||
#include "access/sysattr.h"
|
||||
#include "catalog/catalog.h"
|
||||
#include "catalog/dependency.h"
|
||||
#include "catalog/indexing.h"
|
||||
#include "catalog/pg_class.h"
|
||||
|
@ -379,9 +380,19 @@ DependencyDefinitionFromPgShDepend(ObjectAddress target)
|
|||
/*
|
||||
* Scan pg_shdepend for dbid = $1 AND classid = $2 AND objid = $3 using
|
||||
* pg_shdepend_depender_index
|
||||
*
|
||||
* where $1 is decided as follows:
|
||||
* - shared dependencies $1 = InvalidOid
|
||||
* - other dependencies $1 = MyDatabaseId
|
||||
* This is consistent with postgres' static classIdGetDbId function
|
||||
*/
|
||||
Oid dbid = InvalidOid;
|
||||
if (!IsSharedRelation(target.classId))
|
||||
{
|
||||
dbid = MyDatabaseId;
|
||||
}
|
||||
ScanKeyInit(&key[0], Anum_pg_shdepend_dbid, BTEqualStrategyNumber, F_OIDEQ,
|
||||
MyDatabaseId);
|
||||
ObjectIdGetDatum(dbid));
|
||||
ScanKeyInit(&key[1], Anum_pg_shdepend_classid, BTEqualStrategyNumber, F_OIDEQ,
|
||||
ObjectIdGetDatum(target.classId));
|
||||
ScanKeyInit(&key[2], Anum_pg_shdepend_objid, BTEqualStrategyNumber, F_OIDEQ,
|
||||
|
@ -570,6 +581,12 @@ SupportedDependencyByCitus(const ObjectAddress *address)
|
|||
return true;
|
||||
}
|
||||
|
||||
case OCLASS_DATABASE:
|
||||
{
|
||||
/* only to propagate its owner */
|
||||
return true;
|
||||
}
|
||||
|
||||
case OCLASS_ROLE:
|
||||
{
|
||||
/*
|
||||
|
|
|
@ -2641,6 +2641,8 @@ SecondaryNodeRoleId(void)
|
|||
Datum
|
||||
citus_dist_partition_cache_invalidate(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
TriggerData *triggerData = (TriggerData *) fcinfo->context;
|
||||
Oid oldLogicalRelationId = InvalidOid;
|
||||
Oid newLogicalRelationId = InvalidOid;
|
||||
|
@ -2651,8 +2653,6 @@ citus_dist_partition_cache_invalidate(PG_FUNCTION_ARGS)
|
|||
errmsg("must be called as trigger")));
|
||||
}
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (RelationGetRelid(triggerData->tg_relation) != DistPartitionRelationId())
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
|
||||
|
@ -2718,6 +2718,8 @@ master_dist_partition_cache_invalidate(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_dist_shard_cache_invalidate(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
TriggerData *triggerData = (TriggerData *) fcinfo->context;
|
||||
Oid oldLogicalRelationId = InvalidOid;
|
||||
Oid newLogicalRelationId = InvalidOid;
|
||||
|
@ -2728,8 +2730,6 @@ citus_dist_shard_cache_invalidate(PG_FUNCTION_ARGS)
|
|||
errmsg("must be called as trigger")));
|
||||
}
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (RelationGetRelid(triggerData->tg_relation) != DistShardRelationId())
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
|
||||
|
@ -2795,6 +2795,8 @@ master_dist_shard_cache_invalidate(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_dist_placement_cache_invalidate(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
TriggerData *triggerData = (TriggerData *) fcinfo->context;
|
||||
Oid oldShardId = InvalidOid;
|
||||
Oid newShardId = InvalidOid;
|
||||
|
@ -2805,8 +2807,6 @@ citus_dist_placement_cache_invalidate(PG_FUNCTION_ARGS)
|
|||
errmsg("must be called as trigger")));
|
||||
}
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/*
|
||||
* Before 7.0-2 this trigger is on pg_dist_shard_placement,
|
||||
* ignore trigger in this scenario.
|
||||
|
@ -2884,14 +2884,14 @@ master_dist_placement_cache_invalidate(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_dist_node_cache_invalidate(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (!CALLED_AS_TRIGGER(fcinfo))
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
|
||||
errmsg("must be called as trigger")));
|
||||
}
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
CitusInvalidateRelcacheByRelid(DistNodeRelationId());
|
||||
|
||||
PG_RETURN_DATUM(PointerGetDatum(NULL));
|
||||
|
@ -2919,14 +2919,14 @@ master_dist_node_cache_invalidate(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_conninfo_cache_invalidate(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (!CALLED_AS_TRIGGER(fcinfo))
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
|
||||
errmsg("must be called as trigger")));
|
||||
}
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/* no-op in community edition */
|
||||
|
||||
PG_RETURN_DATUM(PointerGetDatum(NULL));
|
||||
|
@ -2954,14 +2954,14 @@ master_dist_authinfo_cache_invalidate(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_dist_local_group_cache_invalidate(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (!CALLED_AS_TRIGGER(fcinfo))
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
|
||||
errmsg("must be called as trigger")));
|
||||
}
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
CitusInvalidateRelcacheByRelid(DistLocalGroupIdRelationId());
|
||||
|
||||
PG_RETURN_DATUM(PointerGetDatum(NULL));
|
||||
|
@ -2989,14 +2989,14 @@ master_dist_local_group_cache_invalidate(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_dist_object_cache_invalidate(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (!CALLED_AS_TRIGGER(fcinfo))
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
|
||||
errmsg("must be called as trigger")));
|
||||
}
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
CitusInvalidateRelcacheByRelid(DistObjectRelationId());
|
||||
|
||||
PG_RETURN_DATUM(PointerGetDatum(NULL));
|
||||
|
@ -3344,8 +3344,7 @@ GetLocalGroupId(void)
|
|||
return LocalGroupId;
|
||||
}
|
||||
|
||||
Oid localGroupTableOid = get_relname_relid("pg_dist_local_group",
|
||||
PG_CATALOG_NAMESPACE);
|
||||
Oid localGroupTableOid = DistLocalGroupIdRelationId();
|
||||
if (localGroupTableOid == InvalidOid)
|
||||
{
|
||||
return 0;
|
||||
|
|
|
@ -25,11 +25,13 @@
|
|||
#include "access/xact.h"
|
||||
#include "catalog/dependency.h"
|
||||
#include "catalog/indexing.h"
|
||||
#include "catalog/pg_attrdef.h"
|
||||
#include "catalog/pg_depend.h"
|
||||
#include "catalog/pg_foreign_server.h"
|
||||
#include "catalog/pg_namespace.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "commands/async.h"
|
||||
#include "commands/sequence.h"
|
||||
#include "distributed/citus_ruleutils.h"
|
||||
#include "distributed/commands.h"
|
||||
#include "distributed/deparser.h"
|
||||
|
@ -46,10 +48,13 @@
|
|||
#include "distributed/pg_dist_node.h"
|
||||
#include "distributed/remote_commands.h"
|
||||
#include "distributed/worker_manager.h"
|
||||
#include "distributed/worker_protocol.h"
|
||||
#include "distributed/worker_transaction.h"
|
||||
#include "distributed/version_compat.h"
|
||||
#include "executor/spi.h"
|
||||
#include "foreign/foreign.h"
|
||||
#include "miscadmin.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
#include "nodes/pg_list.h"
|
||||
#include "pgstat.h"
|
||||
#include "postmaster/bgworker.h"
|
||||
|
@ -67,7 +72,6 @@ static List * GetDistributedTableDDLEvents(Oid relationId);
|
|||
static char * LocalGroupIdUpdateCommand(int32 groupId);
|
||||
static void UpdateDistNodeBoolAttr(const char *nodeName, int32 nodePort,
|
||||
int attrNum, bool value);
|
||||
static List * SequenceDDLCommandsForTable(Oid relationId);
|
||||
static List * SequenceDependencyCommandList(Oid relationId);
|
||||
static char * TruncateTriggerCreateCommand(Oid relationId);
|
||||
static char * SchemaOwnerName(Oid objectId);
|
||||
|
@ -103,6 +107,8 @@ static bool got_SIGALRM = false;
|
|||
Datum
|
||||
start_metadata_sync_to_node(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
text *nodeName = PG_GETARG_TEXT_P(0);
|
||||
int32 nodePort = PG_GETARG_INT32(1);
|
||||
|
||||
|
@ -126,10 +132,10 @@ StartMetadataSyncToNode(const char *nodeNameString, int32 nodePort)
|
|||
/* fail if metadata synchronization doesn't succeed */
|
||||
bool raiseInterrupts = true;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
EnsureSuperUser();
|
||||
EnsureModificationsCanRun();
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
PreventInTransactionBlock(true, "start_metadata_sync_to_node");
|
||||
|
||||
|
@ -185,14 +191,14 @@ StartMetadataSyncToNode(const char *nodeNameString, int32 nodePort)
|
|||
Datum
|
||||
stop_metadata_sync_to_node(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
EnsureSuperUser();
|
||||
|
||||
text *nodeName = PG_GETARG_TEXT_P(0);
|
||||
int32 nodePort = PG_GETARG_INT32(1);
|
||||
char *nodeNameString = text_to_cstring(nodeName);
|
||||
|
||||
EnsureCoordinator();
|
||||
EnsureSuperUser();
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
LockRelationOid(DistNodeRelationId(), ExclusiveLock);
|
||||
|
||||
WorkerNode *workerNode = FindWorkerNode(nodeNameString, nodePort);
|
||||
|
@ -372,11 +378,9 @@ MetadataCreateCommands(void)
|
|||
continue;
|
||||
}
|
||||
|
||||
List *workerSequenceDDLCommands = SequenceDDLCommandsForTable(relationId);
|
||||
List *ddlCommandList = GetFullTableCreationCommands(relationId,
|
||||
includeSequenceDefaults);
|
||||
char *tableOwnerResetCommand = TableOwnerResetCommand(relationId);
|
||||
List *sequenceDependencyCommandList = SequenceDependencyCommandList(relationId);
|
||||
|
||||
/*
|
||||
* Tables might have dependencies on different objects, since we create shards for
|
||||
|
@ -386,6 +390,7 @@ MetadataCreateCommands(void)
|
|||
ObjectAddressSet(tableAddress, RelationRelationId, relationId);
|
||||
EnsureDependenciesExistOnAllNodes(&tableAddress);
|
||||
|
||||
List *workerSequenceDDLCommands = SequenceDDLCommandsForTable(relationId);
|
||||
metadataSnapshotCommandList = list_concat(metadataSnapshotCommandList,
|
||||
workerSequenceDDLCommands);
|
||||
|
||||
|
@ -400,6 +405,9 @@ MetadataCreateCommands(void)
|
|||
|
||||
metadataSnapshotCommandList = lappend(metadataSnapshotCommandList,
|
||||
tableOwnerResetCommand);
|
||||
|
||||
List *sequenceDependencyCommandList = SequenceDependencyCommandList(
|
||||
relationId);
|
||||
metadataSnapshotCommandList = list_concat(metadataSnapshotCommandList,
|
||||
sequenceDependencyCommandList);
|
||||
}
|
||||
|
@ -507,7 +515,8 @@ GetDistributedTableDDLEvents(Oid relationId)
|
|||
}
|
||||
|
||||
/* command to associate sequences with table */
|
||||
List *sequenceDependencyCommandList = SequenceDependencyCommandList(relationId);
|
||||
List *sequenceDependencyCommandList = SequenceDependencyCommandList(
|
||||
relationId);
|
||||
commandList = list_concat(commandList, sequenceDependencyCommandList);
|
||||
}
|
||||
|
||||
|
@ -1040,21 +1049,58 @@ List *
|
|||
SequenceDDLCommandsForTable(Oid relationId)
|
||||
{
|
||||
List *sequenceDDLList = NIL;
|
||||
List *ownedSequences = GetSequencesOwnedByRelation(relationId);
|
||||
|
||||
List *attnumList = NIL;
|
||||
List *dependentSequenceList = NIL;
|
||||
GetDependentSequencesWithRelation(relationId, &attnumList, &dependentSequenceList, 0);
|
||||
|
||||
char *ownerName = TableOwner(relationId);
|
||||
|
||||
Oid sequenceOid = InvalidOid;
|
||||
foreach_oid(sequenceOid, ownedSequences)
|
||||
ListCell *attnumCell = NULL;
|
||||
ListCell *dependentSequenceCell = NULL;
|
||||
forboth(attnumCell, attnumList, dependentSequenceCell, dependentSequenceList)
|
||||
{
|
||||
AttrNumber attnum = lfirst_int(attnumCell);
|
||||
Oid sequenceOid = lfirst_oid(dependentSequenceCell);
|
||||
|
||||
char *sequenceDef = pg_get_sequencedef_string(sequenceOid);
|
||||
char *escapedSequenceDef = quote_literal_cstr(sequenceDef);
|
||||
StringInfo wrappedSequenceDef = makeStringInfo();
|
||||
StringInfo sequenceGrantStmt = makeStringInfo();
|
||||
char *sequenceName = generate_qualified_relation_name(sequenceOid);
|
||||
Form_pg_sequence sequenceData = pg_get_sequencedef(sequenceOid);
|
||||
Oid sequenceTypeOid = sequenceData->seqtypid;
|
||||
Oid sequenceTypeOid = GetAttributeTypeOid(relationId, attnum);
|
||||
char *typeName = format_type_be(sequenceTypeOid);
|
||||
|
||||
/* get sequence address */
|
||||
ObjectAddress sequenceAddress = { 0 };
|
||||
ObjectAddressSet(sequenceAddress, RelationRelationId, sequenceOid);
|
||||
EnsureDependenciesExistOnAllNodes(&sequenceAddress);
|
||||
|
||||
/*
|
||||
* Alter the sequence's data type in the coordinator if needed.
|
||||
* A sequence's type is bigint by default and it doesn't change even if
|
||||
* it's used in an int column. However, when distributing the sequence,
|
||||
* we don't allow incompatible min/max ranges between the coordinator and
|
||||
* workers, so we determine the sequence type here based on its current usage
|
||||
* and propagate that same type to the workers as well.
|
||||
* TODO: move this command to the part where the sequence is
|
||||
* used in a distributed table: both in create_distributed_table
|
||||
* and ALTER TABLE commands that include a sequence default
|
||||
*/
|
||||
Oid currentSequenceTypeOid = sequenceData->seqtypid;
|
||||
if (currentSequenceTypeOid != sequenceTypeOid)
|
||||
{
|
||||
AlterSeqStmt *alterSequenceStatement = makeNode(AlterSeqStmt);
|
||||
char *seqNamespace = get_namespace_name(get_rel_namespace(sequenceOid));
|
||||
char *seqName = get_rel_name(sequenceOid);
|
||||
alterSequenceStatement->sequence = makeRangeVar(seqNamespace, seqName, -1);
|
||||
Node *asTypeNode = (Node *) makeTypeNameFromOid(sequenceTypeOid, -1);
|
||||
SetDefElemArg(alterSequenceStatement, "as", asTypeNode);
|
||||
ParseState *pstate = make_parsestate(NULL);
|
||||
AlterSequence(pstate, alterSequenceStatement);
|
||||
}
|
||||
|
||||
/* create schema if needed */
|
||||
appendStringInfo(wrappedSequenceDef,
|
||||
WORKER_APPLY_SEQUENCE_COMMAND,
|
||||
|
@ -1067,12 +1113,184 @@ SequenceDDLCommandsForTable(Oid relationId)
|
|||
|
||||
sequenceDDLList = lappend(sequenceDDLList, wrappedSequenceDef->data);
|
||||
sequenceDDLList = lappend(sequenceDDLList, sequenceGrantStmt->data);
|
||||
|
||||
MarkObjectDistributed(&sequenceAddress);
|
||||
}
|
||||
|
||||
return sequenceDDLList;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetAttributeTypeOid returns the OID of the type of the attribute of
|
||||
* provided relationId that has the provided attnum
|
||||
*/
|
||||
Oid
|
||||
GetAttributeTypeOid(Oid relationId, AttrNumber attnum)
|
||||
{
|
||||
Oid resultOid = InvalidOid;
|
||||
|
||||
ScanKeyData key[2];
|
||||
|
||||
/* Grab an appropriate lock on the pg_attribute relation */
|
||||
Relation attrel = table_open(AttributeRelationId, AccessShareLock);
|
||||
|
||||
/* Use the index to scan only system attributes of the target relation */
|
||||
ScanKeyInit(&key[0],
|
||||
Anum_pg_attribute_attrelid,
|
||||
BTEqualStrategyNumber, F_OIDEQ,
|
||||
ObjectIdGetDatum(relationId));
|
||||
ScanKeyInit(&key[1],
|
||||
Anum_pg_attribute_attnum,
|
||||
BTLessEqualStrategyNumber, F_INT2LE,
|
||||
Int16GetDatum(attnum));
|
||||
|
||||
SysScanDesc scan = systable_beginscan(attrel, AttributeRelidNumIndexId, true, NULL, 2,
|
||||
key);
|
||||
|
||||
HeapTuple attributeTuple;
|
||||
while (HeapTupleIsValid(attributeTuple = systable_getnext(scan)))
|
||||
{
|
||||
Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attributeTuple);
|
||||
resultOid = att->atttypid;
|
||||
}
|
||||
|
||||
systable_endscan(scan);
|
||||
table_close(attrel, AccessShareLock);
|
||||
|
||||
return resultOid;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetDependentSequencesWithRelation appends the attnum and id of sequences that
|
||||
* have direct (owned sequences) or indirect dependency with the given relationId,
|
||||
* to the lists passed as NIL initially.
|
||||
* For both cases, we use the intermediate AttrDefault object from pg_depend.
|
||||
* If attnum is specified, we only return the sequences related to that
|
||||
* attribute of the relationId.
|
||||
*/
|
||||
void
|
||||
GetDependentSequencesWithRelation(Oid relationId, List **attnumList,
|
||||
List **dependentSequenceList, AttrNumber attnum)
|
||||
{
|
||||
Assert(*attnumList == NIL && *dependentSequenceList == NIL);
|
||||
|
||||
List *attrdefResult = NIL;
|
||||
List *attrdefAttnumResult = NIL;
|
||||
ScanKeyData key[3];
|
||||
HeapTuple tup;
|
||||
|
||||
Relation depRel = table_open(DependRelationId, AccessShareLock);
|
||||
|
||||
ScanKeyInit(&key[0],
|
||||
Anum_pg_depend_refclassid,
|
||||
BTEqualStrategyNumber, F_OIDEQ,
|
||||
ObjectIdGetDatum(RelationRelationId));
|
||||
ScanKeyInit(&key[1],
|
||||
Anum_pg_depend_refobjid,
|
||||
BTEqualStrategyNumber, F_OIDEQ,
|
||||
ObjectIdGetDatum(relationId));
|
||||
if (attnum)
|
||||
{
|
||||
ScanKeyInit(&key[2],
|
||||
Anum_pg_depend_refobjsubid,
|
||||
BTEqualStrategyNumber, F_INT4EQ,
|
||||
Int32GetDatum(attnum));
|
||||
}
|
||||
|
||||
SysScanDesc scan = systable_beginscan(depRel, DependReferenceIndexId, true,
|
||||
NULL, attnum ? 3 : 2, key);
|
||||
|
||||
while (HeapTupleIsValid(tup = systable_getnext(scan)))
|
||||
{
|
||||
Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(tup);
|
||||
|
||||
if (deprec->classid == AttrDefaultRelationId &&
|
||||
deprec->objsubid == 0 &&
|
||||
deprec->refobjsubid != 0 &&
|
||||
deprec->deptype == DEPENDENCY_AUTO)
|
||||
{
|
||||
attrdefResult = lappend_oid(attrdefResult, deprec->objid);
|
||||
attrdefAttnumResult = lappend_int(attrdefAttnumResult, deprec->refobjsubid);
|
||||
}
|
||||
}
|
||||
|
||||
systable_endscan(scan);
|
||||
|
||||
table_close(depRel, AccessShareLock);
|
||||
|
||||
ListCell *attrdefOidCell = NULL;
|
||||
ListCell *attrdefAttnumCell = NULL;
|
||||
forboth(attrdefOidCell, attrdefResult, attrdefAttnumCell, attrdefAttnumResult)
|
||||
{
|
||||
Oid attrdefOid = lfirst_oid(attrdefOidCell);
|
||||
AttrNumber attrdefAttnum = lfirst_int(attrdefAttnumCell);
|
||||
|
||||
List *sequencesFromAttrDef = GetSequencesFromAttrDef(attrdefOid);
|
||||
|
||||
/* to simplify and eliminate cases like "DEFAULT nextval('..') - nextval('..')" */
|
||||
if (list_length(sequencesFromAttrDef) > 1)
|
||||
{
|
||||
ereport(ERROR, (errmsg("More than one sequence in a column default"
|
||||
" is not supported for distribution")));
|
||||
}
|
||||
|
||||
if (list_length(sequencesFromAttrDef) == 1)
|
||||
{
|
||||
*dependentSequenceList = list_concat(*dependentSequenceList,
|
||||
sequencesFromAttrDef);
|
||||
*attnumList = lappend_int(*attnumList, attrdefAttnum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetSequencesFromAttrDef returns a list of sequence OIDs that have
|
||||
* dependency with the given attrdefOid in pg_depend
|
||||
*/
|
||||
List *
|
||||
GetSequencesFromAttrDef(Oid attrdefOid)
|
||||
{
|
||||
List *sequencesResult = NIL;
|
||||
ScanKeyData key[2];
|
||||
HeapTuple tup;
|
||||
|
||||
Relation depRel = table_open(DependRelationId, AccessShareLock);
|
||||
|
||||
ScanKeyInit(&key[0],
|
||||
Anum_pg_depend_classid,
|
||||
BTEqualStrategyNumber, F_OIDEQ,
|
||||
ObjectIdGetDatum(AttrDefaultRelationId));
|
||||
ScanKeyInit(&key[1],
|
||||
Anum_pg_depend_objid,
|
||||
BTEqualStrategyNumber, F_OIDEQ,
|
||||
ObjectIdGetDatum(attrdefOid));
|
||||
|
||||
SysScanDesc scan = systable_beginscan(depRel, DependDependerIndexId, true,
|
||||
NULL, 2, key);
|
||||
|
||||
while (HeapTupleIsValid(tup = systable_getnext(scan)))
|
||||
{
|
||||
Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(tup);
|
||||
|
||||
if (deprec->refclassid == RelationRelationId &&
|
||||
deprec->deptype == DEPENDENCY_NORMAL &&
|
||||
get_rel_relkind(deprec->refobjid) == RELKIND_SEQUENCE)
|
||||
{
|
||||
sequencesResult = lappend_oid(sequencesResult, deprec->refobjid);
|
||||
}
|
||||
}
|
||||
|
||||
systable_endscan(scan);
|
||||
|
||||
table_close(depRel, AccessShareLock);
|
||||
|
||||
return sequencesResult;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SequenceDependencyCommandList generates commands to record the dependency
|
||||
* of sequences on tables on the worker. This dependency does not exist by
|
||||
|
|
|
@ -270,13 +270,13 @@ citus_shard_sizes(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_total_relation_size(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
bool failOnError = PG_GETARG_BOOL(1);
|
||||
|
||||
SizeQueryType sizeQueryType = TOTAL_RELATION_SIZE;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (CStoreTable(relationId))
|
||||
{
|
||||
sizeQueryType = CSTORE_TABLE_SIZE;
|
||||
|
@ -301,12 +301,12 @@ citus_total_relation_size(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_table_size(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
bool failOnError = true;
|
||||
SizeQueryType sizeQueryType = TABLE_SIZE;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (CStoreTable(relationId))
|
||||
{
|
||||
sizeQueryType = CSTORE_TABLE_SIZE;
|
||||
|
@ -331,12 +331,12 @@ citus_table_size(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_relation_size(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
bool failOnError = true;
|
||||
SizeQueryType sizeQueryType = RELATION_SIZE;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (CStoreTable(relationId))
|
||||
{
|
||||
sizeQueryType = CSTORE_TABLE_SIZE;
|
||||
|
@ -644,7 +644,19 @@ DistributedTableSizeOnWorker(WorkerNode *workerNode, Oid relationId,
|
|||
StringInfo tableSizeStringInfo = (StringInfo) linitial(sizeList);
|
||||
char *tableSizeString = tableSizeStringInfo->data;
|
||||
|
||||
if (strlen(tableSizeString) > 0)
|
||||
{
|
||||
*tableSize = SafeStringToUint64(tableSizeString);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* This means the shard is moved or dropped while citus_total_relation_size is
|
||||
* being executed. For this case we get an empty string as table size.
|
||||
* We can take that as zero to prevent any unnecessary errors.
|
||||
*/
|
||||
*tableSize = 0;
|
||||
}
|
||||
|
||||
PQclear(result);
|
||||
ClearResults(connection, failOnError);
|
||||
|
@ -1288,6 +1300,26 @@ ShardLength(uint64 shardId)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* NodeGroupHasLivePlacements returns true if there is any placement
|
||||
* on the given node group which is not a SHARD_STATE_TO_DELETE placement.
|
||||
*/
|
||||
bool
|
||||
NodeGroupHasLivePlacements(int32 groupId)
|
||||
{
|
||||
List *shardPlacements = AllShardPlacementsOnNodeGroup(groupId);
|
||||
GroupShardPlacement *placement = NULL;
|
||||
foreach_ptr(placement, shardPlacements)
|
||||
{
|
||||
if (placement->shardState != SHARD_STATE_TO_DELETE)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* NodeGroupHasShardPlacements returns whether any active shards are placed on the group
|
||||
*/
|
||||
|
|
|
@ -112,7 +112,7 @@ static bool UnsetMetadataSyncedForAll(void);
|
|||
static void ErrorIfCoordinatorMetadataSetFalse(WorkerNode *workerNode, Datum value,
|
||||
char *field);
|
||||
static WorkerNode * SetShouldHaveShards(WorkerNode *workerNode, bool shouldHaveShards);
|
||||
|
||||
static void RemoveOldShardPlacementForNodeGroup(int groupId);
|
||||
|
||||
/* declarations for dynamic loading */
|
||||
PG_FUNCTION_INFO_V1(citus_set_coordinator_host);
|
||||
|
@ -161,6 +161,8 @@ DefaultNodeMetadata()
|
|||
Datum
|
||||
citus_set_coordinator_host(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
text *nodeName = PG_GETARG_TEXT_P(0);
|
||||
int32 nodePort = PG_GETARG_INT32(1);
|
||||
char *nodeNameString = text_to_cstring(nodeName);
|
||||
|
@ -173,8 +175,6 @@ citus_set_coordinator_host(PG_FUNCTION_ARGS)
|
|||
Name nodeClusterName = PG_GETARG_NAME(3);
|
||||
nodeMetadata.nodeCluster = NameStr(*nodeClusterName);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/* prevent concurrent modification */
|
||||
LockRelationOid(DistNodeRelationId(), RowShareLock);
|
||||
|
||||
|
@ -219,6 +219,8 @@ citus_set_coordinator_host(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_add_node(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
text *nodeName = PG_GETARG_TEXT_P(0);
|
||||
int32 nodePort = PG_GETARG_INT32(1);
|
||||
char *nodeNameString = text_to_cstring(nodeName);
|
||||
|
@ -227,8 +229,6 @@ citus_add_node(PG_FUNCTION_ARGS)
|
|||
bool nodeAlreadyExists = false;
|
||||
nodeMetadata.groupId = PG_GETARG_INT32(2);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/*
|
||||
* During tests this function is called before nodeRole and nodeCluster have been
|
||||
* created.
|
||||
|
@ -288,6 +288,8 @@ master_add_node(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_add_inactive_node(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
text *nodeName = PG_GETARG_TEXT_P(0);
|
||||
int32 nodePort = PG_GETARG_INT32(1);
|
||||
char *nodeNameString = text_to_cstring(nodeName);
|
||||
|
@ -299,8 +301,6 @@ citus_add_inactive_node(PG_FUNCTION_ARGS)
|
|||
nodeMetadata.nodeRole = PG_GETARG_OID(3);
|
||||
nodeMetadata.nodeCluster = NameStr(*nodeClusterName);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (nodeMetadata.groupId == COORDINATOR_GROUP_ID)
|
||||
{
|
||||
ereport(ERROR, (errmsg("coordinator node cannot be added as inactive node")));
|
||||
|
@ -331,6 +331,8 @@ master_add_inactive_node(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_add_secondary_node(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
text *nodeName = PG_GETARG_TEXT_P(0);
|
||||
int32 nodePort = PG_GETARG_INT32(1);
|
||||
char *nodeNameString = text_to_cstring(nodeName);
|
||||
|
@ -348,8 +350,6 @@ citus_add_secondary_node(PG_FUNCTION_ARGS)
|
|||
nodeMetadata.nodeRole = SecondaryNodeRoleId();
|
||||
nodeMetadata.isActive = true;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
int nodeId = AddNodeMetadata(nodeNameString, nodePort, &nodeMetadata,
|
||||
&nodeAlreadyExists);
|
||||
TransactionModifiedNodeMetadata = true;
|
||||
|
@ -380,11 +380,11 @@ master_add_secondary_node(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_remove_node(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
text *nodeNameText = PG_GETARG_TEXT_P(0);
|
||||
int32 nodePort = PG_GETARG_INT32(1);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
RemoveNodeFromCluster(text_to_cstring(nodeNameText), nodePort);
|
||||
TransactionModifiedNodeMetadata = true;
|
||||
|
||||
|
@ -631,7 +631,6 @@ static WorkerNode *
|
|||
ModifiableWorkerNode(const char *nodeName, int32 nodePort)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
EnsureCoordinator();
|
||||
|
||||
/* take an exclusive lock on pg_dist_node to serialize pg_dist_node changes */
|
||||
|
@ -843,6 +842,8 @@ ActivateNode(char *nodeName, int nodePort)
|
|||
Datum
|
||||
citus_update_node(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
int32 nodeId = PG_GETARG_INT32(0);
|
||||
|
||||
text *newNodeName = PG_GETARG_TEXT_P(1);
|
||||
|
@ -864,8 +865,6 @@ citus_update_node(PG_FUNCTION_ARGS)
|
|||
List *placementList = NIL;
|
||||
BackgroundWorkerHandle *handle = NULL;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
WorkerNode *workerNodeWithSameAddress = FindWorkerNodeAnyCluster(newNodeNameString,
|
||||
newNodePort);
|
||||
if (workerNodeWithSameAddress != NULL)
|
||||
|
@ -1077,10 +1076,10 @@ UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort)
|
|||
Datum
|
||||
get_shard_id_for_distribution_column(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ShardInterval *shardInterval = NULL;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
ShardInterval *shardInterval = NULL;
|
||||
|
||||
/*
|
||||
* To have optional parameter as NULL, we defined this UDF as not strict, therefore
|
||||
* we need to check all parameters for NULL values.
|
||||
|
@ -1291,11 +1290,9 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort)
|
|||
*/
|
||||
DeleteAllReferenceTablePlacementsFromNodeGroup(workerNode->groupId);
|
||||
}
|
||||
bool onlyConsiderActivePlacements = false;
|
||||
if (NodeGroupHasShardPlacements(workerNode->groupId,
|
||||
onlyConsiderActivePlacements))
|
||||
if (NodeGroupHasLivePlacements(workerNode->groupId))
|
||||
{
|
||||
if (ClusterHasReferenceTable())
|
||||
if (ActivePrimaryNodeCount() == 1 && ClusterHasReferenceTable())
|
||||
{
|
||||
ereport(ERROR, (errmsg(
|
||||
"cannot remove the last worker node because there are reference "
|
||||
|
@ -1320,6 +1317,8 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort)
|
|||
|
||||
DeleteNodeRow(workerNode->workerName, nodePort);
|
||||
|
||||
RemoveOldShardPlacementForNodeGroup(workerNode->groupId);
|
||||
|
||||
char *nodeDeleteCommand = NodeDeleteCommand(workerNode->nodeId);
|
||||
|
||||
/* make sure we don't have any lingering session lifespan connections */
|
||||
|
@ -1329,6 +1328,29 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* RemoveOldShardPlacementForNodeGroup removes all old shard placements
|
||||
* for the given node group from pg_dist_placement.
|
||||
*/
|
||||
static void
|
||||
RemoveOldShardPlacementForNodeGroup(int groupId)
|
||||
{
|
||||
/*
|
||||
* Prevent concurrent deferred drop
|
||||
*/
|
||||
LockPlacementCleanup();
|
||||
List *shardPlacementsOnNode = AllShardPlacementsOnNodeGroup(groupId);
|
||||
GroupShardPlacement *placement = NULL;
|
||||
foreach_ptr(placement, shardPlacementsOnNode)
|
||||
{
|
||||
if (placement->shardState == SHARD_STATE_TO_DELETE)
|
||||
{
|
||||
DeleteShardPlacementRow(placement->placementId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CanRemoveReferenceTablePlacements returns true if active primary
|
||||
* node count is more than 1, which means that even if we remove a node
|
||||
|
@ -1818,7 +1840,7 @@ InsertPlaceholderCoordinatorRecord(void)
|
|||
bool nodeAlreadyExists = false;
|
||||
|
||||
/* as long as there is a single node, localhost should be ok */
|
||||
AddNodeMetadata("localhost", PostPortNumber, &nodeMetadata, &nodeAlreadyExists);
|
||||
AddNodeMetadata(LocalHostName, PostPortNumber, &nodeMetadata, &nodeAlreadyExists);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -49,12 +49,12 @@ PG_FUNCTION_INFO_V1(citus_create_restore_point);
|
|||
Datum
|
||||
citus_create_restore_point(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *restoreNameText = PG_GETARG_TEXT_P(0);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureSuperUser();
|
||||
EnsureCoordinator();
|
||||
|
||||
text *restoreNameText = PG_GETARG_TEXT_P(0);
|
||||
|
||||
if (RecoveryInProgress())
|
||||
{
|
||||
ereport(ERROR,
|
||||
|
|
|
@ -71,14 +71,14 @@ static Tuplestorestate * CreateTupleStore(TupleDesc tupleDescriptor,
|
|||
Datum
|
||||
master_run_on_worker(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
|
||||
bool parallelExecution = false;
|
||||
StringInfo *nodeNameArray = NULL;
|
||||
int *nodePortArray = NULL;
|
||||
StringInfo *commandStringArray = NULL;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/* check to see if caller supports us returning a tuplestore */
|
||||
if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
|
||||
{
|
||||
|
|
|
@ -64,6 +64,9 @@ PG_FUNCTION_INFO_V1(master_create_worker_shards);
|
|||
Datum
|
||||
master_create_worker_shards(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
|
||||
text *tableNameText = PG_GETARG_TEXT_P(0);
|
||||
int32 shardCount = PG_GETARG_INT32(1);
|
||||
int32 replicationFactor = PG_GETARG_INT32(2);
|
||||
|
@ -74,9 +77,6 @@ master_create_worker_shards(PG_FUNCTION_ARGS)
|
|||
/* do not add any data */
|
||||
bool useExclusiveConnections = false;
|
||||
|
||||
EnsureCoordinator();
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/*
|
||||
* distributed tables might have dependencies on different objects, since we create
|
||||
* shards for a distributed table via multiple sessions these objects will be created
|
||||
|
|
|
@ -109,6 +109,9 @@ PG_FUNCTION_INFO_V1(master_drop_sequences);
|
|||
Datum
|
||||
master_apply_delete_command(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
|
||||
text *queryText = PG_GETARG_TEXT_P(0);
|
||||
char *queryString = text_to_cstring(queryText);
|
||||
List *deletableShardIntervalList = NIL;
|
||||
|
@ -116,9 +119,6 @@ master_apply_delete_command(PG_FUNCTION_ARGS)
|
|||
RawStmt *rawStmt = (RawStmt *) ParseTreeRawStmt(queryString);
|
||||
Node *queryTreeNode = rawStmt->stmt;
|
||||
|
||||
EnsureCoordinator();
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (!IsA(queryTreeNode, DeleteStmt))
|
||||
{
|
||||
ereport(ERROR, (errmsg("query \"%s\" is not a delete statement",
|
||||
|
@ -208,6 +208,8 @@ master_apply_delete_command(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_drop_all_shards(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
text *schemaNameText = PG_GETARG_TEXT_P(1);
|
||||
text *relationNameText = PG_GETARG_TEXT_P(2);
|
||||
|
@ -215,8 +217,6 @@ citus_drop_all_shards(PG_FUNCTION_ARGS)
|
|||
char *schemaName = text_to_cstring(schemaNameText);
|
||||
char *relationName = text_to_cstring(relationNameText);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/*
|
||||
* The SQL_DROP trigger calls this function even for tables that are
|
||||
* not distributed. In that case, silently ignore and return -1.
|
||||
|
@ -325,7 +325,7 @@ DropShards(Oid relationId, char *schemaName, char *relationName,
|
|||
*/
|
||||
if (MultiShardCommitProtocol == COMMIT_PROTOCOL_2PC)
|
||||
{
|
||||
CoordinatedTransactionShouldUse2PC();
|
||||
Use2PCForCoordinatedTransaction();
|
||||
}
|
||||
|
||||
List *dropTaskList = DropTaskList(relationId, schemaName, relationName,
|
||||
|
|
|
@ -70,13 +70,13 @@ PG_FUNCTION_INFO_V1(master_modify_multiple_shards);
|
|||
Datum
|
||||
master_modify_multiple_shards(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
text *queryText = PG_GETARG_TEXT_P(0);
|
||||
char *queryString = text_to_cstring(queryText);
|
||||
RawStmt *rawStmt = (RawStmt *) ParseTreeRawStmt(queryString);
|
||||
Node *queryTreeNode = rawStmt->stmt;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (!IsA(queryTreeNode, DeleteStmt) && !IsA(queryTreeNode, UpdateStmt))
|
||||
{
|
||||
ereport(ERROR, (errmsg("query \"%s\" is not a delete or update "
|
||||
|
|
|
@ -74,10 +74,13 @@ int ShardPlacementPolicy = SHARD_PLACEMENT_ROUND_ROBIN;
|
|||
int NextShardId = 0;
|
||||
int NextPlacementId = 0;
|
||||
|
||||
static List * GetTableReplicaIdentityCommand(Oid relationId);
|
||||
static void GatherIndexAndConstraintDefinitionListExcludingReplicaIdentity(Form_pg_index
|
||||
indexForm,
|
||||
List **
|
||||
indexDDLEventList,
|
||||
int indexFlags);
|
||||
static Datum WorkerNodeGetDatum(WorkerNode *workerNode, TupleDesc tupleDescriptor);
|
||||
static void GatherIndexAndConstraintDefinitionList(Form_pg_index indexForm,
|
||||
List **indexDDLEventList);
|
||||
|
||||
|
||||
/* exports for SQL callable functions */
|
||||
PG_FUNCTION_INFO_V1(master_get_table_metadata);
|
||||
|
@ -100,6 +103,8 @@ PG_FUNCTION_INFO_V1(master_stage_shard_placement_row);
|
|||
Datum
|
||||
master_get_table_metadata(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
text *relationName = PG_GETARG_TEXT_P(0);
|
||||
Oid relationId = ResolveRelationId(relationName, false);
|
||||
|
||||
|
@ -109,8 +114,6 @@ master_get_table_metadata(PG_FUNCTION_ARGS)
|
|||
Datum values[TABLE_METADATA_FIELDS];
|
||||
bool isNulls[TABLE_METADATA_FIELDS];
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/* find partition tuple for partitioned relation */
|
||||
CitusTableCacheEntry *partitionEntry = GetCitusTableCacheEntry(relationId);
|
||||
|
||||
|
@ -198,11 +201,11 @@ CStoreTable(Oid relationId)
|
|||
Datum
|
||||
master_get_table_ddl_events(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
FuncCallContext *functionContext = NULL;
|
||||
ListCell *tableDDLEventCell = NULL;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/*
|
||||
* On the very first call to this function, we first use the given relation
|
||||
* name to get to the relation. We then recreate the list of DDL statements
|
||||
|
@ -273,8 +276,8 @@ master_get_table_ddl_events(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
master_get_new_shardid(PG_FUNCTION_ARGS)
|
||||
{
|
||||
EnsureCoordinator();
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
|
||||
uint64 shardId = GetNextShardId();
|
||||
Datum shardIdDatum = Int64GetDatum(shardId);
|
||||
|
@ -343,8 +346,8 @@ GetNextShardId()
|
|||
Datum
|
||||
master_get_new_placementid(PG_FUNCTION_ARGS)
|
||||
{
|
||||
EnsureCoordinator();
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
|
||||
uint64 placementId = GetNextPlacementId();
|
||||
Datum placementIdDatum = Int64GetDatum(placementId);
|
||||
|
@ -450,11 +453,11 @@ master_stage_shard_placement_row(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_get_active_worker_nodes(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
FuncCallContext *functionContext = NULL;
|
||||
uint32 workerNodeCount = 0;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
/* create a function context for cross-call persistence */
|
||||
|
@ -549,7 +552,7 @@ GetFullTableCreationCommands(Oid relationId, bool includeSequenceDefaults)
|
|||
tableDDLEventList = list_concat(tableDDLEventList, preLoadCreationCommandList);
|
||||
|
||||
List *postLoadCreationCommandList =
|
||||
GetPostLoadTableCreationCommands(relationId, true);
|
||||
GetPostLoadTableCreationCommands(relationId, true, true);
|
||||
|
||||
tableDDLEventList = list_concat(tableDDLEventList, postLoadCreationCommandList);
|
||||
|
||||
|
@ -562,19 +565,43 @@ GetFullTableCreationCommands(Oid relationId, bool includeSequenceDefaults)
|
|||
* of DDL commands that should be applied after loading the data.
|
||||
*/
|
||||
List *
|
||||
GetPostLoadTableCreationCommands(Oid relationId, bool includeIndexes)
|
||||
GetPostLoadTableCreationCommands(Oid relationId, bool includeIndexes,
|
||||
bool includeReplicaIdentity)
|
||||
{
|
||||
List *tableDDLEventList = NIL;
|
||||
|
||||
if (includeIndexes)
|
||||
/*
|
||||
* Include all the commands (e.g., create index, set index clustered
|
||||
* and set index statistics) regarding the indexes. Note that
|
||||
* running all these commands in parallel might fail as the
|
||||
* latter two depends on the first one. So, the caller should
|
||||
* execute the commands sequentially.
|
||||
*/
|
||||
int indexFlags = INCLUDE_INDEX_ALL_STATEMENTS;
|
||||
|
||||
if (includeIndexes && includeReplicaIdentity)
|
||||
{
|
||||
List *indexAndConstraintCommandList =
|
||||
GetTableIndexAndConstraintCommands(relationId);
|
||||
GetTableIndexAndConstraintCommands(relationId, indexFlags);
|
||||
tableDDLEventList = list_concat(tableDDLEventList, indexAndConstraintCommandList);
|
||||
}
|
||||
else if (includeIndexes && !includeReplicaIdentity)
|
||||
{
|
||||
/*
|
||||
* Do not include the indexes/constraints that backs
|
||||
* replica identity, if any.
|
||||
*/
|
||||
List *indexAndConstraintCommandList =
|
||||
GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId,
|
||||
indexFlags);
|
||||
tableDDLEventList = list_concat(tableDDLEventList, indexAndConstraintCommandList);
|
||||
}
|
||||
|
||||
if (includeReplicaIdentity)
|
||||
{
|
||||
List *replicaIdentityEvents = GetTableReplicaIdentityCommand(relationId);
|
||||
tableDDLEventList = list_concat(tableDDLEventList, replicaIdentityEvents);
|
||||
}
|
||||
|
||||
List *triggerCommands = GetExplicitTriggerCommandList(relationId);
|
||||
tableDDLEventList = list_concat(tableDDLEventList, triggerCommands);
|
||||
|
@ -590,7 +617,7 @@ GetPostLoadTableCreationCommands(Oid relationId, bool includeIndexes)
|
|||
* GetTableReplicaIdentityCommand returns the list of DDL commands to
|
||||
* (re)define the replica identity choice for a given table.
|
||||
*/
|
||||
static List *
|
||||
List *
|
||||
GetTableReplicaIdentityCommand(Oid relationId)
|
||||
{
|
||||
List *replicaIdentityCreateCommandList = NIL;
|
||||
|
@ -694,18 +721,82 @@ GetPreLoadTableCreationCommands(Oid relationId, bool includeSequenceDefaults,
|
|||
* (re)create indexes and constraints for a given table.
|
||||
*/
|
||||
List *
|
||||
GetTableIndexAndConstraintCommands(Oid relationId)
|
||||
GetTableIndexAndConstraintCommands(Oid relationId, int indexFlags)
|
||||
{
|
||||
return ExecuteFunctionOnEachTableIndex(relationId,
|
||||
GatherIndexAndConstraintDefinitionList);
|
||||
GatherIndexAndConstraintDefinitionList,
|
||||
indexFlags);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetTableIndexAndConstraintCommands returns the list of DDL commands to
|
||||
* (re)create indexes and constraints for a given table.
|
||||
*/
|
||||
List *
|
||||
GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(Oid relationId, int indexFlags)
|
||||
{
|
||||
return ExecuteFunctionOnEachTableIndex(relationId,
|
||||
GatherIndexAndConstraintDefinitionListExcludingReplicaIdentity,
|
||||
indexFlags);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GatherIndexAndConstraintDefinitionListExcludingReplicaIdentity is a wrapper around
|
||||
* GatherIndexAndConstraintDefinitionList(), which only excludes the indexes or
|
||||
* constraints that back the replica identity.
|
||||
*/
|
||||
static void
|
||||
GatherIndexAndConstraintDefinitionListExcludingReplicaIdentity(Form_pg_index indexForm,
|
||||
List **indexDDLEventList,
|
||||
int indexFlags)
|
||||
{
|
||||
Oid relationId = indexForm->indrelid;
|
||||
Relation relation = table_open(relationId, AccessShareLock);
|
||||
|
||||
Oid replicaIdentityIndex = GetRelationIdentityOrPK(relation);
|
||||
|
||||
if (replicaIdentityIndex == indexForm->indexrelid)
|
||||
{
|
||||
/* this index is backing the replica identity, so skip */
|
||||
table_close(relation, NoLock);
|
||||
return;
|
||||
}
|
||||
|
||||
GatherIndexAndConstraintDefinitionList(indexForm, indexDDLEventList, indexFlags);
|
||||
|
||||
table_close(relation, NoLock);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Get replica identity index or if it is not defined a primary key.
|
||||
*
|
||||
* If neither is defined, returns InvalidOid.
|
||||
*
|
||||
* Inspired from postgres/src/backend/replication/logical/worker.c
|
||||
*/
|
||||
Oid
|
||||
GetRelationIdentityOrPK(Relation rel)
|
||||
{
|
||||
Oid idxoid = RelationGetReplicaIndex(rel);
|
||||
|
||||
if (!OidIsValid(idxoid))
|
||||
{
|
||||
idxoid = RelationGetPrimaryKeyIndex(rel);
|
||||
}
|
||||
|
||||
return idxoid;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GatherIndexAndConstraintDefinitionList adds the DDL command for the given index.
|
||||
*/
|
||||
static void
|
||||
GatherIndexAndConstraintDefinitionList(Form_pg_index indexForm, List **indexDDLEventList)
|
||||
void
|
||||
GatherIndexAndConstraintDefinitionList(Form_pg_index indexForm, List **indexDDLEventList,
|
||||
int indexFlags)
|
||||
{
|
||||
Oid indexId = indexForm->indexrelid;
|
||||
char *statementDef = NULL;
|
||||
|
@ -726,11 +817,15 @@ GatherIndexAndConstraintDefinitionList(Form_pg_index indexForm, List **indexDDLE
|
|||
}
|
||||
|
||||
/* append found constraint or index definition to the list */
|
||||
if (indexFlags & INCLUDE_CREATE_INDEX_STATEMENTS)
|
||||
{
|
||||
*indexDDLEventList = lappend(*indexDDLEventList, makeTableDDLCommandString(
|
||||
statementDef));
|
||||
}
|
||||
|
||||
/* if table is clustered on this index, append definition to the list */
|
||||
if (indexForm->indisclustered)
|
||||
if ((indexFlags & INCLUDE_INDEX_CLUSTERED_STATEMENTS) &&
|
||||
indexForm->indisclustered)
|
||||
{
|
||||
char *clusteredDef = pg_get_indexclusterdef_string(indexId);
|
||||
Assert(clusteredDef != NULL);
|
||||
|
@ -740,8 +835,12 @@ GatherIndexAndConstraintDefinitionList(Form_pg_index indexForm, List **indexDDLE
|
|||
}
|
||||
|
||||
/* we need alter index commands for altered targets on expression indexes */
|
||||
if (indexFlags & INCLUDE_INDEX_STATISTICS_STATEMENTTS)
|
||||
{
|
||||
List *alterIndexStatisticsCommands = GetAlterIndexStatisticsCommands(indexId);
|
||||
*indexDDLEventList = list_concat(*indexDDLEventList, alterIndexStatisticsCommands);
|
||||
*indexDDLEventList = list_concat(*indexDDLEventList,
|
||||
alterIndexStatisticsCommands);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -35,10 +35,10 @@ PG_FUNCTION_INFO_V1(time_partition_range);
|
|||
Datum
|
||||
time_partition_range(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
|
||||
/* create tuple descriptor for return value */
|
||||
TupleDesc metadataDescriptor = NULL;
|
||||
TypeFuncClass resultTypeClass = get_call_result_type(fcinfo, NULL,
|
||||
|
|
|
@ -63,7 +63,10 @@ static void ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName
|
|||
char shardReplicationMode);
|
||||
static void CopyShardTables(List *shardIntervalList, char *sourceNodeName,
|
||||
int32 sourceNodePort, char *targetNodeName,
|
||||
int32 targetNodePort);
|
||||
int32 targetNodePort, bool useLogicalReplication);
|
||||
static void CopyShardTablesViaBlockWrites(List *shardIntervalList, char *sourceNodeName,
|
||||
int32 sourceNodePort,
|
||||
char *targetNodeName, int32 targetNodePort);
|
||||
static List * CopyPartitionShardsCommandList(ShardInterval *shardInterval,
|
||||
const char *sourceNodeName,
|
||||
int32 sourceNodePort);
|
||||
|
@ -93,6 +96,7 @@ static void EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList,
|
|||
char *targetNodeName, uint32
|
||||
targetNodePort);
|
||||
|
||||
|
||||
/* declarations for dynamic loading */
|
||||
PG_FUNCTION_INFO_V1(citus_copy_shard_placement);
|
||||
PG_FUNCTION_INFO_V1(master_copy_shard_placement);
|
||||
|
@ -118,6 +122,9 @@ bool CheckAvailableSpaceBeforeMove = true;
|
|||
Datum
|
||||
citus_copy_shard_placement(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
|
||||
int64 shardId = PG_GETARG_INT64(0);
|
||||
text *sourceNodeNameText = PG_GETARG_TEXT_P(1);
|
||||
int32 sourceNodePort = PG_GETARG_INT32(2);
|
||||
|
@ -129,9 +136,6 @@ citus_copy_shard_placement(PG_FUNCTION_ARGS)
|
|||
char *sourceNodeName = text_to_cstring(sourceNodeNameText);
|
||||
char *targetNodeName = text_to_cstring(targetNodeNameText);
|
||||
|
||||
EnsureCoordinator();
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
|
||||
if (shardReplicationMode == TRANSFER_MODE_FORCE_LOGICAL)
|
||||
{
|
||||
|
@ -279,6 +283,9 @@ CheckSpaceConstraints(MultiConnection *connection, uint64 colocationSizeInBytes)
|
|||
Datum
|
||||
citus_move_shard_placement(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
|
||||
int64 shardId = PG_GETARG_INT64(0);
|
||||
char *sourceNodeName = text_to_cstring(PG_GETARG_TEXT_P(1));
|
||||
int32 sourceNodePort = PG_GETARG_INT32(2);
|
||||
|
@ -290,12 +297,9 @@ citus_move_shard_placement(PG_FUNCTION_ARGS)
|
|||
ListCell *colocatedTableCell = NULL;
|
||||
ListCell *colocatedShardCell = NULL;
|
||||
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
|
||||
Oid relationId = RelationIdForShard(shardId);
|
||||
ErrorIfMoveCitusLocalTable(relationId);
|
||||
ErrorIfTargetNodeIsNotSafeToMove(targetNodeName, targetNodePort);
|
||||
|
||||
ShardInterval *shardInterval = LoadShardInterval(shardId);
|
||||
Oid distributedTableId = shardInterval->relationId;
|
||||
|
@ -359,8 +363,9 @@ citus_move_shard_placement(PG_FUNCTION_ARGS)
|
|||
* CopyColocatedShardPlacement function copies given shard with its co-located
|
||||
* shards.
|
||||
*/
|
||||
bool useLogicalReplication = false;
|
||||
CopyShardTables(colocatedShardList, sourceNodeName, sourceNodePort, targetNodeName,
|
||||
targetNodePort);
|
||||
targetNodePort, useLogicalReplication);
|
||||
|
||||
ShardInterval *colocatedShard = NULL;
|
||||
foreach_ptr(colocatedShard, colocatedShardList)
|
||||
|
@ -417,6 +422,51 @@ EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ErrorIfTargetNodeIsNotSafeToMove throws error if the target node is not
|
||||
* eligible for moving shards.
|
||||
*/
|
||||
void
|
||||
ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int targetNodePort)
|
||||
{
|
||||
WorkerNode *workerNode = FindWorkerNode(targetNodeName, targetNodePort);
|
||||
if (workerNode == NULL)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("Moving shards to a non-existing node is not supported"),
|
||||
errhint(
|
||||
"Add the target node via SELECT citus_add_node('%s', %d);",
|
||||
targetNodeName, targetNodePort)));
|
||||
}
|
||||
|
||||
if (!workerNode->isActive)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("Moving shards to a non-active node is not supported"),
|
||||
errhint(
|
||||
"Activate the target node via SELECT citus_activate_node('%s', %d);",
|
||||
targetNodeName, targetNodePort)));
|
||||
}
|
||||
|
||||
if (!workerNode->shouldHaveShards)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("Moving shards to a node that shouldn't have a shard is "
|
||||
"not supported"),
|
||||
errhint("Allow shards on the target node via "
|
||||
"SELECT * FROM citus_set_node_property('%s', %d, 'shouldhaveshards', true);",
|
||||
targetNodeName, targetNodePort)));
|
||||
}
|
||||
|
||||
if (!NodeIsPrimary(workerNode))
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("Moving shards to a secondary (e.g., replica) node is "
|
||||
"not supported")));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* master_move_shard_placement is a wrapper around citus_move_shard_placement.
|
||||
*/
|
||||
|
@ -741,8 +791,9 @@ ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName,
|
|||
EnsureReferenceTablesExistOnAllNodesExtended(shardReplicationMode);
|
||||
}
|
||||
|
||||
bool useLogicalReplication = false;
|
||||
CopyShardTables(colocatedShardList, sourceNodeName, sourceNodePort,
|
||||
targetNodeName, targetNodePort);
|
||||
targetNodeName, targetNodePort, useLogicalReplication);
|
||||
|
||||
/*
|
||||
* Finally insert the placements to pg_dist_placement and sync it to the
|
||||
|
@ -820,32 +871,51 @@ EnsureTableListSuitableForReplication(List *tableIdList)
|
|||
|
||||
|
||||
/*
|
||||
* CopyColocatedShardPlacement copies a shard along with its co-located shards
|
||||
* from a source node to target node. It does not make any checks about state
|
||||
* of the shards. It is caller's responsibility to make those checks if they are
|
||||
* necessary.
|
||||
* CopyShardTables copies a shard along with its co-located shards from a source
|
||||
* node to target node. It does not make any checks about state of the shards.
|
||||
* It is caller's responsibility to make those checks if they are necessary.
|
||||
*/
|
||||
static void
|
||||
CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodePort,
|
||||
char *targetNodeName, int32 targetNodePort)
|
||||
char *targetNodeName, int32 targetNodePort, bool useLogicalReplication)
|
||||
{
|
||||
ShardInterval *shardInterval = NULL;
|
||||
if (list_length(shardIntervalList) < 1)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (useLogicalReplication)
|
||||
{
|
||||
/* only supported in Citus enterprise */
|
||||
}
|
||||
else
|
||||
{
|
||||
CopyShardTablesViaBlockWrites(shardIntervalList, sourceNodeName, sourceNodePort,
|
||||
targetNodeName, targetNodePort);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CopyShardTablesViaBlockWrites copies a shard along with its co-located shards
|
||||
* from a source node to target node via COPY command. While the command is in
|
||||
* progress, the modifications on the source node is blocked.
|
||||
*/
|
||||
static void
|
||||
CopyShardTablesViaBlockWrites(List *shardIntervalList, char *sourceNodeName,
|
||||
int32 sourceNodePort, char *targetNodeName,
|
||||
int32 targetNodePort)
|
||||
{
|
||||
MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext,
|
||||
"CopyShardTables",
|
||||
"CopyShardTablesViaBlockWrites",
|
||||
ALLOCSET_DEFAULT_SIZES);
|
||||
MemoryContext oldContext = MemoryContextSwitchTo(localContext);
|
||||
|
||||
/* iterate through the colocated shards and copy each */
|
||||
ShardInterval *shardInterval = NULL;
|
||||
foreach_ptr(shardInterval, shardIntervalList)
|
||||
{
|
||||
bool includeDataCopy = true;
|
||||
|
||||
if (PartitionedTable(shardInterval->relationId))
|
||||
{
|
||||
/* partitioned tables contain no data */
|
||||
includeDataCopy = false;
|
||||
}
|
||||
bool includeDataCopy = !PartitionedTable(shardInterval->relationId);
|
||||
|
||||
List *ddlCommandList = CopyShardCommandList(shardInterval, sourceNodeName,
|
||||
sourceNodePort, includeDataCopy);
|
||||
|
@ -853,10 +923,9 @@ CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodeP
|
|||
|
||||
SendCommandListToWorkerInSingleTransaction(targetNodeName, targetNodePort,
|
||||
tableOwner, ddlCommandList);
|
||||
}
|
||||
|
||||
MemoryContextReset(localContext);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Once all shards are created, we can recreate relationships between shards.
|
||||
|
@ -868,15 +937,14 @@ CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodeP
|
|||
{
|
||||
List *shardForeignConstraintCommandList = NIL;
|
||||
List *referenceTableForeignConstraintList = NIL;
|
||||
|
||||
char *tableOwner = TableOwner(shardInterval->relationId);
|
||||
List *commandList = NIL;
|
||||
|
||||
CopyShardForeignConstraintCommandListGrouped(shardInterval,
|
||||
&shardForeignConstraintCommandList,
|
||||
&referenceTableForeignConstraintList);
|
||||
|
||||
List *commandList = list_concat(shardForeignConstraintCommandList,
|
||||
referenceTableForeignConstraintList);
|
||||
commandList = list_concat(commandList, shardForeignConstraintCommandList);
|
||||
commandList = list_concat(commandList, referenceTableForeignConstraintList);
|
||||
|
||||
if (PartitionTable(shardInterval->relationId))
|
||||
{
|
||||
|
@ -886,8 +954,10 @@ CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodeP
|
|||
commandList = lappend(commandList, attachPartitionCommand);
|
||||
}
|
||||
|
||||
char *tableOwner = TableOwner(shardInterval->relationId);
|
||||
SendCommandListToWorkerInSingleTransaction(targetNodeName, targetNodePort,
|
||||
tableOwner, commandList);
|
||||
|
||||
MemoryContextReset(localContext);
|
||||
}
|
||||
|
||||
|
@ -990,11 +1060,41 @@ EnsureShardCanBeCopied(int64 shardId, const char *sourceNodeName, int32 sourceNo
|
|||
targetNodeName,
|
||||
targetNodePort);
|
||||
if (targetPlacement != NULL)
|
||||
{
|
||||
if (targetPlacement->shardState == SHARD_STATE_TO_DELETE)
|
||||
{
|
||||
/*
|
||||
* Trigger deletion of orphaned shards and hope that this removes
|
||||
* the shard.
|
||||
*/
|
||||
DropOrphanedShardsInSeparateTransaction();
|
||||
shardPlacementList = ShardPlacementList(shardId);
|
||||
targetPlacement = SearchShardPlacementInList(shardPlacementList,
|
||||
targetNodeName,
|
||||
targetNodePort);
|
||||
|
||||
/*
|
||||
* If it still doesn't remove the shard, then we error.
|
||||
*/
|
||||
if (targetPlacement != NULL)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("shard " INT64_FORMAT " already exists in the target node",
|
||||
errmsg(
|
||||
"shard " INT64_FORMAT
|
||||
" still exists on the target node as an orphaned shard",
|
||||
shardId),
|
||||
errdetail(
|
||||
"The existing shard is orphaned, but could not be deleted because there are still active queries on it")));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg(
|
||||
"shard " INT64_FORMAT " already exists in the target node",
|
||||
shardId)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -1079,7 +1179,9 @@ CopyShardCommandList(ShardInterval *shardInterval, const char *sourceNodeName,
|
|||
copyShardDataCommand->data);
|
||||
}
|
||||
|
||||
List *indexCommandList = GetPostLoadTableCreationCommands(relationId, true);
|
||||
bool includeReplicaIdentity = true;
|
||||
List *indexCommandList =
|
||||
GetPostLoadTableCreationCommands(relationId, true, includeReplicaIdentity);
|
||||
indexCommandList = WorkerApplyShardDDLCommandList(indexCommandList, shardId);
|
||||
|
||||
copyShardToNodeCommandsList = list_concat(copyShardToNodeCommandsList,
|
||||
|
|
|
@ -12,59 +12,109 @@
|
|||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/xact.h"
|
||||
#include "postmaster/postmaster.h"
|
||||
|
||||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/shard_cleaner.h"
|
||||
#include "distributed/shard_rebalancer.h"
|
||||
#include "distributed/remote_commands.h"
|
||||
#include "distributed/resource_lock.h"
|
||||
#include "distributed/worker_transaction.h"
|
||||
|
||||
|
||||
/* declarations for dynamic loading */
|
||||
PG_FUNCTION_INFO_V1(master_defer_delete_shards);
|
||||
PG_FUNCTION_INFO_V1(citus_cleanup_orphaned_shards);
|
||||
PG_FUNCTION_INFO_V1(isolation_cleanup_orphaned_shards);
|
||||
|
||||
static bool TryDropShard(GroupShardPlacement *placement);
|
||||
static bool TryLockRelationAndPlacementCleanup(Oid relationId, LOCKMODE lockmode);
|
||||
|
||||
|
||||
/*
|
||||
* master_defer_delete_shards implements a user-facing UDF to deleter orphaned shards that
|
||||
* are still haning around in the system. These shards are orphaned by previous actions
|
||||
* that were not directly able to delete the placements eg. shard moving or dropping of a
|
||||
* distributed table while one of the data nodes was not online.
|
||||
* citus_cleanup_orphaned_shards implements a user-facing UDF to delete
|
||||
* orphaned shards that are still haning around in the system. These shards are
|
||||
* orphaned by previous actions that were not directly able to delete the
|
||||
* placements eg. shard moving or dropping of a distributed table while one of
|
||||
* the data nodes was not online.
|
||||
*
|
||||
* This function iterates through placements where shardstate is SHARD_STATE_TO_DELETE
|
||||
* (shardstate = 4), drops the corresponding tables from the node and removes the
|
||||
* placement information from the catalog.
|
||||
* This function iterates through placements where shardstate is
|
||||
* SHARD_STATE_TO_DELETE (shardstate = 4), drops the corresponding tables from
|
||||
* the node and removes the placement information from the catalog.
|
||||
*
|
||||
* The function takes no arguments and runs cluster wide
|
||||
* The function takes no arguments and runs cluster wide. It cannot be run in a
|
||||
* transaction, because holding the locks it takes for a long time is not good.
|
||||
* While the locks are held, it is impossible for the background daemon to
|
||||
* cleanup orphaned shards.
|
||||
*/
|
||||
Datum
|
||||
master_defer_delete_shards(PG_FUNCTION_ARGS)
|
||||
citus_cleanup_orphaned_shards(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
PreventInTransactionBlock(true, "citus_cleanup_orphaned_shards");
|
||||
|
||||
bool waitForCleanupLock = true;
|
||||
int droppedShardCount = DropMarkedShards(waitForCleanupLock);
|
||||
bool waitForLocks = true;
|
||||
int droppedShardCount = DropOrphanedShards(waitForLocks);
|
||||
if (droppedShardCount > 0)
|
||||
{
|
||||
ereport(NOTICE, (errmsg("cleaned up %d orphaned shards", droppedShardCount)));
|
||||
}
|
||||
|
||||
PG_RETURN_INT32(droppedShardCount);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TryDropMarkedShards is a wrapper around DropMarkedShards that catches
|
||||
* isolation_cleanup_orphaned_shards implements a test UDF that's the same as
|
||||
* citus_cleanup_orphaned_shards. The only difference is that this command can
|
||||
* be run in transactions, this is to test
|
||||
*/
|
||||
Datum
|
||||
isolation_cleanup_orphaned_shards(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
|
||||
bool waitForLocks = true;
|
||||
int droppedShardCount = DropOrphanedShards(waitForLocks);
|
||||
if (droppedShardCount > 0)
|
||||
{
|
||||
ereport(NOTICE, (errmsg("cleaned up %d orphaned shards", droppedShardCount)));
|
||||
}
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DropOrphanedShardsInSeparateTransaction cleans up orphaned shards by
|
||||
* connecting to localhost. This is done, so that the locks that
|
||||
* DropOrphanedShards takes are only held for a short time.
|
||||
*/
|
||||
void
|
||||
DropOrphanedShardsInSeparateTransaction(void)
|
||||
{
|
||||
ExecuteCriticalCommandInSeparateTransaction("CALL citus_cleanup_orphaned_shards()");
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TryDropOrphanedShards is a wrapper around DropOrphanedShards that catches
|
||||
* any errors to make it safe to use in the maintenance daemon.
|
||||
*
|
||||
* If dropping any of the shards failed this function returns -1, otherwise it
|
||||
* returns the number of dropped shards.
|
||||
*/
|
||||
int
|
||||
TryDropMarkedShards(bool waitForCleanupLock)
|
||||
TryDropOrphanedShards(bool waitForLocks)
|
||||
{
|
||||
int droppedShardCount = 0;
|
||||
MemoryContext savedContext = CurrentMemoryContext;
|
||||
PG_TRY();
|
||||
{
|
||||
droppedShardCount = DropMarkedShards(waitForCleanupLock);
|
||||
droppedShardCount = DropOrphanedShards(waitForLocks);
|
||||
}
|
||||
PG_CATCH();
|
||||
{
|
||||
|
@ -83,7 +133,7 @@ TryDropMarkedShards(bool waitForCleanupLock)
|
|||
|
||||
|
||||
/*
|
||||
* DropMarkedShards removes shards that were marked SHARD_STATE_TO_DELETE before.
|
||||
* DropOrphanedShards removes shards that were marked SHARD_STATE_TO_DELETE before.
|
||||
*
|
||||
* It does so by trying to take an exclusive lock on the shard and its
|
||||
* colocated placements before removing. If the lock cannot be obtained it
|
||||
|
@ -91,33 +141,46 @@ TryDropMarkedShards(bool waitForCleanupLock)
|
|||
* will be removed at a later time when there are no locks held anymore on
|
||||
* those placements.
|
||||
*
|
||||
* If waitForLocks is false, then if we cannot take a lock on pg_dist_placement
|
||||
* we continue without waiting.
|
||||
*
|
||||
* Before doing any of this it will take an exclusive PlacementCleanup lock.
|
||||
* This is to ensure that this function is not being run concurrently.
|
||||
* Otherwise really bad race conditions are possible, such as removing all
|
||||
* placements of a shard. waitForCleanupLock indicates if this function should
|
||||
* wait for this lock or error out.
|
||||
* placements of a shard. waitForLocks indicates if this function should
|
||||
* wait for this lock or not.
|
||||
*
|
||||
*/
|
||||
int
|
||||
DropMarkedShards(bool waitForCleanupLock)
|
||||
DropOrphanedShards(bool waitForLocks)
|
||||
{
|
||||
int removedShardCount = 0;
|
||||
ListCell *shardPlacementCell = NULL;
|
||||
|
||||
/*
|
||||
* We should try to take the highest lock that we take
|
||||
* later in this function for pg_dist_placement. We take RowExclusiveLock
|
||||
* in DeleteShardPlacementRow.
|
||||
*/
|
||||
LOCKMODE lockmode = RowExclusiveLock;
|
||||
|
||||
if (!IsCoordinator())
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (waitForCleanupLock)
|
||||
if (waitForLocks)
|
||||
{
|
||||
LockPlacementCleanup();
|
||||
}
|
||||
else if (!TryLockPlacementCleanup())
|
||||
else
|
||||
{
|
||||
Oid distPlacementId = DistPlacementRelationId();
|
||||
if (!TryLockRelationAndPlacementCleanup(distPlacementId, lockmode))
|
||||
{
|
||||
ereport(WARNING, (errmsg("could not acquire lock to cleanup placements")));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int failedShardDropCount = 0;
|
||||
List *shardPlacementList = AllShardPlacementsWithShardPlacementState(
|
||||
|
@ -145,7 +208,7 @@ DropMarkedShards(bool waitForCleanupLock)
|
|||
|
||||
if (failedShardDropCount > 0)
|
||||
{
|
||||
ereport(WARNING, (errmsg("Failed to drop %d old shards out of %d",
|
||||
ereport(WARNING, (errmsg("Failed to drop %d orphaned shards out of %d",
|
||||
failedShardDropCount, list_length(shardPlacementList))));
|
||||
}
|
||||
|
||||
|
@ -153,10 +216,33 @@ DropMarkedShards(bool waitForCleanupLock)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* TryLockRelationAndCleanup tries to lock the given relation
|
||||
* and the placement cleanup. If it cannot, it returns false.
|
||||
*
|
||||
*/
|
||||
static bool
|
||||
TryLockRelationAndPlacementCleanup(Oid relationId, LOCKMODE lockmode)
|
||||
{
|
||||
if (!ConditionalLockRelationOid(relationId, lockmode))
|
||||
{
|
||||
ereport(DEBUG1, (errmsg(
|
||||
"could not acquire shard lock to cleanup placements")));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!TryLockPlacementCleanup())
|
||||
{
|
||||
ereport(DEBUG1, (errmsg("could not acquire lock to cleanup placements")));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TryDropShard tries to drop the given shard placement and returns
|
||||
* true on success. On failure, this method swallows errors and emits them
|
||||
* as WARNINGs.
|
||||
* true on success.
|
||||
*/
|
||||
static bool
|
||||
TryDropShard(GroupShardPlacement *placement)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -61,10 +61,10 @@ isolate_tenant_to_new_shard(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
worker_hash(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Datum valueDatum = PG_GETARG_DATUM(0);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Datum valueDatum = PG_GETARG_DATUM(0);
|
||||
|
||||
/* figure out hash function from the data type */
|
||||
Oid valueDataType = get_fn_expr_argtype(fcinfo->flinfo, 0);
|
||||
TypeCacheEntry *typeEntry = lookup_type_cache(valueDataType,
|
||||
|
|
|
@ -94,6 +94,8 @@ PG_FUNCTION_INFO_V1(citus_update_table_statistics);
|
|||
Datum
|
||||
master_create_empty_shard(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
text *relationNameText = PG_GETARG_TEXT_P(0);
|
||||
char *relationName = text_to_cstring(relationNameText);
|
||||
uint32 attemptableNodeCount = 0;
|
||||
|
@ -108,8 +110,6 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
|
|||
Oid relationId = ResolveRelationId(relationNameText, false);
|
||||
char relationKind = get_rel_relkind(relationId);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
EnsureTablePermissions(relationId, ACL_INSERT);
|
||||
CheckDistributedTable(relationId);
|
||||
|
||||
|
@ -171,10 +171,6 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
|
|||
"on local tables")));
|
||||
}
|
||||
|
||||
char replicationModel = TableReplicationModel(relationId);
|
||||
|
||||
EnsureReplicationSettings(relationId, replicationModel);
|
||||
|
||||
/* generate new and unique shardId from sequence */
|
||||
uint64 shardId = GetNextShardId();
|
||||
|
||||
|
@ -243,6 +239,8 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
master_append_table_to_shard(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
uint64 shardId = PG_GETARG_INT64(0);
|
||||
text *sourceTableNameText = PG_GETARG_TEXT_P(1);
|
||||
text *sourceNodeNameText = PG_GETARG_TEXT_P(2);
|
||||
|
@ -253,8 +251,6 @@ master_append_table_to_shard(PG_FUNCTION_ARGS)
|
|||
|
||||
float4 shardFillLevel = 0.0;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
ShardInterval *shardInterval = LoadShardInterval(shardId);
|
||||
Oid relationId = shardInterval->relationId;
|
||||
|
||||
|
@ -363,10 +359,10 @@ master_append_table_to_shard(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_update_shard_statistics(PG_FUNCTION_ARGS)
|
||||
{
|
||||
int64 shardId = PG_GETARG_INT64(0);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
int64 shardId = PG_GETARG_INT64(0);
|
||||
|
||||
uint64 shardSize = UpdateShardStatistics(shardId);
|
||||
|
||||
PG_RETURN_INT64(shardSize);
|
||||
|
@ -380,10 +376,10 @@ citus_update_shard_statistics(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
citus_update_table_statistics(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid distributedTableId = PG_GETARG_OID(0);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid distributedTableId = PG_GETARG_OID(0);
|
||||
|
||||
UpdateTableStatistics(distributedTableId);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
|
|
|
@ -59,6 +59,7 @@ RebuildQueryStrings(Job *workerJob)
|
|||
Query *originalQuery = workerJob->jobQuery;
|
||||
List *taskList = workerJob->taskList;
|
||||
Task *task = NULL;
|
||||
bool isSingleTask = list_length(taskList) == 1;
|
||||
|
||||
if (originalQuery->commandType == CMD_INSERT)
|
||||
{
|
||||
|
@ -74,7 +75,7 @@ RebuildQueryStrings(Job *workerJob)
|
|||
* task, we scribble on the original query to avoid the copying
|
||||
* overhead.
|
||||
*/
|
||||
if (list_length(taskList) > 1)
|
||||
if (!isSingleTask)
|
||||
{
|
||||
query = copyObject(originalQuery);
|
||||
}
|
||||
|
@ -119,6 +120,19 @@ RebuildQueryStrings(Job *workerJob)
|
|||
* deparse_shard_query when the string is needed
|
||||
*/
|
||||
task->anchorDistributedTableId = modifiedRelationRTE->relid;
|
||||
|
||||
/*
|
||||
* For multi-row inserts, we modify the VALUES before storing the
|
||||
* query in the task.
|
||||
*/
|
||||
RangeTblEntry *valuesRTE = ExtractDistributedInsertValuesRTE(query);
|
||||
if (valuesRTE != NULL)
|
||||
{
|
||||
Assert(valuesRTE->rtekind == RTE_VALUES);
|
||||
Assert(task->rowValuesLists != NULL);
|
||||
|
||||
valuesRTE->values_lists = task->rowValuesLists;
|
||||
}
|
||||
}
|
||||
|
||||
bool isQueryObjectOrText = GetTaskQueryType(task) == TASK_QUERY_TEXT ||
|
||||
|
@ -180,39 +194,7 @@ AddInsertAliasIfNeeded(Query *query)
|
|||
static void
|
||||
UpdateTaskQueryString(Query *query, Task *task)
|
||||
{
|
||||
List *oldValuesLists = NIL;
|
||||
RangeTblEntry *valuesRTE = NULL;
|
||||
|
||||
if (query->commandType == CMD_INSERT)
|
||||
{
|
||||
/* extract the VALUES from the INSERT */
|
||||
valuesRTE = ExtractDistributedInsertValuesRTE(query);
|
||||
|
||||
if (valuesRTE != NULL)
|
||||
{
|
||||
Assert(valuesRTE->rtekind == RTE_VALUES);
|
||||
Assert(task->rowValuesLists != NULL);
|
||||
|
||||
oldValuesLists = valuesRTE->values_lists;
|
||||
valuesRTE->values_lists = task->rowValuesLists;
|
||||
}
|
||||
|
||||
if (ShouldLazyDeparseQuery(task))
|
||||
{
|
||||
/*
|
||||
* not all insert queries are copied before calling this
|
||||
* function, so we do it here
|
||||
*/
|
||||
query = copyObject(query);
|
||||
}
|
||||
}
|
||||
|
||||
SetTaskQueryIfShouldLazyDeparse(task, query);
|
||||
|
||||
if (valuesRTE != NULL)
|
||||
{
|
||||
valuesRTE->values_lists = oldValuesLists;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -94,6 +94,7 @@
|
|||
#include "distributed/multi_physical_planner.h"
|
||||
#include "distributed/multi_server_executor.h"
|
||||
#include "distributed/multi_router_planner.h"
|
||||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/query_colocation_checker.h"
|
||||
#include "distributed/query_pushdown_planning.h"
|
||||
#include "distributed/recursive_planning.h"
|
||||
|
@ -179,7 +180,8 @@ static ConversionCandidates * CreateConversionCandidates(PlannerRestrictionConte
|
|||
plannerRestrictionContext,
|
||||
List *rangeTableList,
|
||||
int resultRTEIdentity);
|
||||
static void AppendUniqueIndexColumnsToList(Form_pg_index indexForm, List **uniqueIndexes);
|
||||
static void AppendUniqueIndexColumnsToList(Form_pg_index indexForm, List **uniqueIndexes,
|
||||
int flags);
|
||||
static ConversionChoice GetConversionChoice(ConversionCandidates *
|
||||
conversionCandidates,
|
||||
PlannerRestrictionContext *
|
||||
|
@ -403,7 +405,8 @@ HasConstantFilterOnUniqueColumn(RangeTblEntry *rangeTableEntry,
|
|||
FetchEqualityAttrNumsForRTE((Node *) restrictClauseList);
|
||||
|
||||
List *uniqueIndexColumnsList = ExecuteFunctionOnEachTableIndex(rangeTableEntry->relid,
|
||||
AppendUniqueIndexColumnsToList);
|
||||
AppendUniqueIndexColumnsToList,
|
||||
INCLUDE_INDEX_ALL_STATEMENTS);
|
||||
IndexColumns *indexColumns = NULL;
|
||||
foreach_ptr(indexColumns, uniqueIndexColumnsList)
|
||||
{
|
||||
|
@ -442,7 +445,8 @@ FirstIsSuperSetOfSecond(List *firstIntList, List *secondIntList)
|
|||
* unique index.
|
||||
*/
|
||||
static void
|
||||
AppendUniqueIndexColumnsToList(Form_pg_index indexForm, List **uniqueIndexGroups)
|
||||
AppendUniqueIndexColumnsToList(Form_pg_index indexForm, List **uniqueIndexGroups,
|
||||
int flags)
|
||||
{
|
||||
if (indexForm->indisunique || indexForm->indisprimary)
|
||||
{
|
||||
|
|
|
@ -139,6 +139,14 @@ GetCachedLocalPlan(Task *task, DistributedPlan *distributedPlan)
|
|||
bool
|
||||
IsLocalPlanCachingSupported(Job *currentJob, DistributedPlan *originalDistributedPlan)
|
||||
{
|
||||
if (originalDistributedPlan->numberOfTimesExecuted < 1)
|
||||
{
|
||||
/*
|
||||
* Only cache if a plan is being reused (via a prepared statement).
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!currentJob->deferredPruning)
|
||||
{
|
||||
/*
|
||||
|
|
|
@ -318,10 +318,6 @@ static Node * WorkerLimitCount(Node *limitCount, Node *limitOffset, OrderByLimit
|
|||
static List * WorkerSortClauseList(Node *limitCount,
|
||||
List *groupClauseList, List *sortClauseList,
|
||||
OrderByLimitReference orderByLimitReference);
|
||||
static List * GenerateNewTargetEntriesForSortClauses(List *originalTargetList,
|
||||
List *sortClauseList,
|
||||
AttrNumber *targetProjectionNumber,
|
||||
Index *nextSortGroupRefIndex);
|
||||
static bool CanPushDownLimitApproximate(List *sortClauseList, List *targetList);
|
||||
static bool HasOrderByAggregate(List *sortClauseList, List *targetList);
|
||||
static bool HasOrderByNonCommutativeAggregate(List *sortClauseList, List *targetList);
|
||||
|
@ -2701,38 +2697,6 @@ ProcessWindowFunctionsForWorkerQuery(List *windowClauseList,
|
|||
return;
|
||||
}
|
||||
|
||||
WindowClause *windowClause = NULL;
|
||||
foreach_ptr(windowClause, windowClauseList)
|
||||
{
|
||||
List *partitionClauseTargetList =
|
||||
GenerateNewTargetEntriesForSortClauses(originalTargetEntryList,
|
||||
windowClause->partitionClause,
|
||||
&(queryTargetList->
|
||||
targetProjectionNumber),
|
||||
queryWindowClause->
|
||||
nextSortGroupRefIndex);
|
||||
List *orderClauseTargetList =
|
||||
GenerateNewTargetEntriesForSortClauses(originalTargetEntryList,
|
||||
windowClause->orderClause,
|
||||
&(queryTargetList->
|
||||
targetProjectionNumber),
|
||||
queryWindowClause->
|
||||
nextSortGroupRefIndex);
|
||||
|
||||
/*
|
||||
* Note that even Citus does push down the window clauses as-is, we may still need to
|
||||
* add the generated entries to the target list. The reason is that the same aggregates
|
||||
* might be referred from another target entry that is a bare aggregate (e.g., no window
|
||||
* functions), which would have been mutated. For instance, when an average aggregate
|
||||
* is mutated on the target list, the window function would refer to a sum aggregate,
|
||||
* which is obviously wrong.
|
||||
*/
|
||||
queryTargetList->targetEntryList = list_concat(queryTargetList->targetEntryList,
|
||||
partitionClauseTargetList);
|
||||
queryTargetList->targetEntryList = list_concat(queryTargetList->targetEntryList,
|
||||
orderClauseTargetList);
|
||||
}
|
||||
|
||||
queryWindowClause->workerWindowClauseList = windowClauseList;
|
||||
queryWindowClause->hasWindowFunctions = true;
|
||||
}
|
||||
|
@ -2798,19 +2762,6 @@ ProcessLimitOrderByForWorkerQuery(OrderByLimitReference orderByLimitReference,
|
|||
groupClauseList,
|
||||
sortClauseList,
|
||||
orderByLimitReference);
|
||||
|
||||
/*
|
||||
* TODO: Do we really need to add the target entries if we're not pushing
|
||||
* down ORDER BY?
|
||||
*/
|
||||
List *newTargetEntryListForSortClauses =
|
||||
GenerateNewTargetEntriesForSortClauses(originalTargetList,
|
||||
queryOrderByLimit->workerSortClauseList,
|
||||
&(queryTargetList->targetProjectionNumber),
|
||||
queryOrderByLimit->nextSortGroupRefIndex);
|
||||
|
||||
queryTargetList->targetEntryList =
|
||||
list_concat(queryTargetList->targetEntryList, newTargetEntryListForSortClauses);
|
||||
}
|
||||
|
||||
|
||||
|
@ -4795,87 +4746,6 @@ WorkerSortClauseList(Node *limitCount, List *groupClauseList, List *sortClauseLi
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* GenerateNewTargetEntriesForSortClauses goes over provided sort clause lists and
|
||||
* creates new target entries if needed to make sure sort clauses has correct
|
||||
* references. The function returns list of new target entries, caller is
|
||||
* responsible to add those target entries to the end of worker target list.
|
||||
*
|
||||
* The function is required because we change the target entry if it contains an
|
||||
* expression having an aggregate operation, or just the AVG aggregate.
|
||||
* Afterwards any order by clause referring to original target entry starts
|
||||
* to point to a wrong expression.
|
||||
*
|
||||
* Note the function modifies SortGroupClause items in sortClauseList,
|
||||
* targetProjectionNumber, and nextSortGroupRefIndex.
|
||||
*/
|
||||
static List *
|
||||
GenerateNewTargetEntriesForSortClauses(List *originalTargetList,
|
||||
List *sortClauseList,
|
||||
AttrNumber *targetProjectionNumber,
|
||||
Index *nextSortGroupRefIndex)
|
||||
{
|
||||
List *createdTargetList = NIL;
|
||||
|
||||
SortGroupClause *sgClause = NULL;
|
||||
foreach_ptr(sgClause, sortClauseList)
|
||||
{
|
||||
TargetEntry *targetEntry = get_sortgroupclause_tle(sgClause, originalTargetList);
|
||||
Expr *targetExpr = targetEntry->expr;
|
||||
bool containsAggregate = contain_aggs_of_level((Node *) targetExpr, 0);
|
||||
bool createNewTargetEntry = false;
|
||||
|
||||
/* we are only interested in target entries containing aggregates */
|
||||
if (!containsAggregate)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the target expression is not an Aggref, it is either an expression
|
||||
* on a single aggregate, or expression containing multiple aggregates.
|
||||
* Worker query mutates these target entries to have a naked target entry
|
||||
* per aggregate function. We want to use original target entries if this
|
||||
* the case.
|
||||
* If the original target expression is an avg aggref, we also want to use
|
||||
* original target entry.
|
||||
*/
|
||||
if (!IsA(targetExpr, Aggref))
|
||||
{
|
||||
createNewTargetEntry = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
Aggref *aggNode = (Aggref *) targetExpr;
|
||||
AggregateType aggregateType = GetAggregateType(aggNode);
|
||||
if (aggregateType == AGGREGATE_AVERAGE)
|
||||
{
|
||||
createNewTargetEntry = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (createNewTargetEntry)
|
||||
{
|
||||
bool resJunk = true;
|
||||
AttrNumber nextResNo = (*targetProjectionNumber);
|
||||
Expr *newExpr = copyObject(targetExpr);
|
||||
TargetEntry *newTargetEntry = makeTargetEntry(newExpr, nextResNo,
|
||||
targetEntry->resname, resJunk);
|
||||
newTargetEntry->ressortgroupref = *nextSortGroupRefIndex;
|
||||
|
||||
createdTargetList = lappend(createdTargetList, newTargetEntry);
|
||||
|
||||
sgClause->tleSortGroupRef = *nextSortGroupRefIndex;
|
||||
|
||||
(*nextSortGroupRefIndex)++;
|
||||
(*targetProjectionNumber)++;
|
||||
}
|
||||
}
|
||||
|
||||
return createdTargetList;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CanPushDownLimitApproximate checks if we can push down the limit clause to
|
||||
* the worker nodes, and get approximate and meaningful results. We can do this
|
||||
|
|
|
@ -2425,7 +2425,7 @@ CreateLocalDummyPlacement()
|
|||
{
|
||||
ShardPlacement *dummyPlacement = CitusMakeNode(ShardPlacement);
|
||||
dummyPlacement->nodeId = LOCAL_NODE_ID;
|
||||
dummyPlacement->nodeName = LOCAL_HOST_NAME;
|
||||
dummyPlacement->nodeName = LocalHostName;
|
||||
dummyPlacement->nodePort = PostPortNumber;
|
||||
dummyPlacement->groupId = GetLocalGroupId();
|
||||
return dummyPlacement;
|
||||
|
|
|
@ -901,13 +901,13 @@ AppendShardIdToName(char **name, uint64 shardId)
|
|||
Datum
|
||||
shard_name(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
int64 shardId = PG_GETARG_INT64(1);
|
||||
|
||||
char *qualifiedName = NULL;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (shardId <= 0)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
|
|
|
@ -68,6 +68,7 @@
|
|||
#include "distributed/time_constants.h"
|
||||
#include "distributed/query_stats.h"
|
||||
#include "distributed/remote_commands.h"
|
||||
#include "distributed/shard_rebalancer.h"
|
||||
#include "distributed/shared_library_init.h"
|
||||
#include "distributed/statistics_collection.h"
|
||||
#include "distributed/subplan_execution.h"
|
||||
|
@ -98,6 +99,9 @@ PG_MODULE_MAGIC;
|
|||
#define DUMMY_REAL_TIME_EXECUTOR_ENUM_VALUE 9999999
|
||||
static char *CitusVersion = CITUS_VERSION;
|
||||
|
||||
/* deprecated GUC value that should not be used anywhere outside this file */
|
||||
static int ReplicationModel = REPLICATION_MODEL_STREAMING;
|
||||
|
||||
|
||||
void _PG_init(void);
|
||||
void _PG_fini(void);
|
||||
|
@ -114,6 +118,7 @@ static void RegisterCitusConfigVariables(void);
|
|||
static bool ErrorIfNotASuitableDeadlockFactor(double *newval, void **extra,
|
||||
GucSource source);
|
||||
static bool WarnIfDeprecatedExecutorUsed(int *newval, void **extra, GucSource source);
|
||||
static bool WarnIfReplicationModelIsSet(int *newval, void **extra, GucSource source);
|
||||
static bool NoticeIfSubqueryPushdownEnabled(bool *newval, void **extra, GucSource source);
|
||||
static bool NodeConninfoGucCheckHook(char **newval, void **extra, GucSource source);
|
||||
static void NodeConninfoGucAssignHook(const char *newval, void *extra);
|
||||
|
@ -575,6 +580,17 @@ RegisterCitusConfigVariables(void)
|
|||
GUC_STANDARD,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
"citus.check_available_space_before_move",
|
||||
gettext_noop("When enabled will check free disk space before a shard move"),
|
||||
gettext_noop(
|
||||
"Free disk space will be checked when this setting is enabled before each shard move."),
|
||||
&CheckAvailableSpaceBeforeMove,
|
||||
true,
|
||||
PGC_USERSET,
|
||||
GUC_NO_SHOW_ALL,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomStringVariable(
|
||||
"citus.cluster_name",
|
||||
gettext_noop("Which cluster this node is a part of"),
|
||||
|
@ -629,7 +645,9 @@ RegisterCitusConfigVariables(void)
|
|||
|
||||
DefineCustomBoolVariable(
|
||||
"citus.defer_drop_after_shard_move",
|
||||
gettext_noop("When enabled a shard move will mark old shards for deletion"),
|
||||
gettext_noop("When enabled a shard move will mark the original shards "
|
||||
"for deletion after a successful move, instead of deleting "
|
||||
"them right away."),
|
||||
gettext_noop("The deletion of a shard can sometimes run into a conflict with a "
|
||||
"long running transactions on a the shard during the drop phase of "
|
||||
"the shard move. This causes some moves to be rolled back after "
|
||||
|
@ -639,7 +657,7 @@ RegisterCitusConfigVariables(void)
|
|||
"citus.defer_shard_delete_interval to make sure defered deletions "
|
||||
"will be executed"),
|
||||
&DeferShardDeleteOnMove,
|
||||
false,
|
||||
true,
|
||||
PGC_USERSET,
|
||||
0,
|
||||
NULL, NULL, NULL);
|
||||
|
@ -654,11 +672,37 @@ RegisterCitusConfigVariables(void)
|
|||
"the background worker moves on. When set to -1 this background "
|
||||
"process is skipped."),
|
||||
&DeferShardDeleteInterval,
|
||||
-1, -1, 7 * 24 * 3600 * 1000,
|
||||
15000, -1, 7 * 24 * 3600 * 1000,
|
||||
PGC_SIGHUP,
|
||||
GUC_UNIT_MS,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomRealVariable(
|
||||
"citus.desired_percent_disk_available_after_move",
|
||||
gettext_noop(
|
||||
"Sets how many percentage of free disk space should be after a shard move"),
|
||||
gettext_noop(
|
||||
"This setting controls how much free space should be available after a shard move."
|
||||
"If the free disk space will be lower than this parameter, then shard move will result in"
|
||||
"an error."),
|
||||
&DesiredPercentFreeAfterMove,
|
||||
10.0, 0.0, 100.0,
|
||||
PGC_SIGHUP,
|
||||
GUC_STANDARD,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
"citus.enable_manual_changes_to_shards",
|
||||
gettext_noop("Enables dropping and truncating known shards."),
|
||||
gettext_noop("Set to false by default. If set to true, enables "
|
||||
"dropping and truncating shards on the coordinator "
|
||||
"(or the workers with metadata)"),
|
||||
&EnableManualChangesToShards,
|
||||
false,
|
||||
PGC_USERSET,
|
||||
GUC_NO_SHOW_ALL,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomRealVariable(
|
||||
"citus.distributed_deadlock_detection_factor",
|
||||
gettext_noop("Sets the time to wait before checking for distributed "
|
||||
|
@ -693,6 +737,17 @@ RegisterCitusConfigVariables(void)
|
|||
GUC_NO_SHOW_ALL,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
"citus.enable_alter_database_owner",
|
||||
gettext_noop("Enables propagating ALTER DATABASE ... OWNER TO ... statements to "
|
||||
"workers"),
|
||||
NULL,
|
||||
&EnableAlterDatabaseOwner,
|
||||
false,
|
||||
PGC_USERSET,
|
||||
GUC_NO_SHOW_ALL,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
"citus.enable_binary_protocol",
|
||||
gettext_noop(
|
||||
|
@ -940,30 +995,17 @@ RegisterCitusConfigVariables(void)
|
|||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
"citus.check_available_space_before_move",
|
||||
gettext_noop("When enabled will check free disk space before a shard move"),
|
||||
gettext_noop(
|
||||
"Free disk space will be checked when this setting is enabled before each shard move."),
|
||||
&CheckAvailableSpaceBeforeMove,
|
||||
"citus.enable_cost_based_connection_establishment",
|
||||
gettext_noop("When enabled the connection establishment times "
|
||||
"and task execution times into account for deciding "
|
||||
"whether or not to establish new connections."),
|
||||
NULL,
|
||||
&EnableCostBasedConnectionEstablishment,
|
||||
true,
|
||||
PGC_USERSET,
|
||||
GUC_NO_SHOW_ALL,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomRealVariable(
|
||||
"citus.desired_percent_disk_available_after_move",
|
||||
gettext_noop(
|
||||
"Sets how many percentage of free disk space should be after a shard move"),
|
||||
gettext_noop(
|
||||
"This setting controls how much free space should be available after a shard move."
|
||||
"If the free disk space will be lower than this parameter, then shard move will result in"
|
||||
"an error."),
|
||||
&DesiredPercentFreeAfterMove,
|
||||
10.0, 0.0, 100.0,
|
||||
PGC_SIGHUP,
|
||||
GUC_STANDARD,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
"citus.explain_distributed_queries",
|
||||
gettext_noop("Enables Explain for distributed queries."),
|
||||
|
@ -1051,6 +1093,19 @@ RegisterCitusConfigVariables(void)
|
|||
GUC_UNIT_BYTE | GUC_NO_SHOW_ALL,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomStringVariable(
|
||||
"citus.local_hostname",
|
||||
gettext_noop("Sets the hostname when connecting back to itself."),
|
||||
gettext_noop("For some operations nodes, mostly the coordinator, connect back to "
|
||||
"itself. When configuring SSL certificates it sometimes is required "
|
||||
"to use a specific hostname to match the CN of the certificate when "
|
||||
"verify-full is used."),
|
||||
&LocalHostName,
|
||||
"localhost",
|
||||
PGC_SUSET,
|
||||
GUC_STANDARD,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomIntVariable(
|
||||
"citus.local_shared_pool_size",
|
||||
gettext_noop(
|
||||
|
@ -1190,6 +1245,16 @@ RegisterCitusConfigVariables(void)
|
|||
GUC_UNIT_KB | GUC_STANDARD,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomIntVariable(
|
||||
"citus.max_rebalancer_logged_ignored_moves",
|
||||
gettext_noop("Sets the maximum number of ignored moves the rebalance logs"),
|
||||
NULL,
|
||||
&MaxRebalancerLoggedIgnoredMoves,
|
||||
5, -1, INT_MAX,
|
||||
PGC_USERSET,
|
||||
GUC_NO_SHOW_ALL,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomIntVariable(
|
||||
"citus.max_shared_pool_size",
|
||||
gettext_noop("Sets the maximum number of connections allowed per worker node "
|
||||
|
@ -1364,6 +1429,21 @@ RegisterCitusConfigVariables(void)
|
|||
GUC_UNIT_KB | GUC_STANDARD,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
"citus.prevent_incomplete_connection_establishment",
|
||||
gettext_noop("When enabled, the executor waits until all the connections "
|
||||
"are successfully established."),
|
||||
gettext_noop("Under some load, the executor may decide to establish some "
|
||||
"extra connections to further parallelize the execution. However,"
|
||||
"before the connection establishment is done, the execution might "
|
||||
"have already finished. When this GUC is set to true, the execution "
|
||||
"waits for such connections to be established."),
|
||||
&PreventIncompleteConnectionEstablishment,
|
||||
true,
|
||||
PGC_USERSET,
|
||||
GUC_NO_SHOW_ALL,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomEnumVariable(
|
||||
"citus.propagate_set_commands",
|
||||
gettext_noop("Sets which SET commands are propagated to workers."),
|
||||
|
@ -1438,16 +1518,28 @@ RegisterCitusConfigVariables(void)
|
|||
|
||||
DefineCustomEnumVariable(
|
||||
"citus.replication_model",
|
||||
gettext_noop("Sets the replication model to be used for distributed tables."),
|
||||
gettext_noop("Depending upon the execution environment, statement- or streaming-"
|
||||
"based replication modes may be employed. Though most Citus deploy-"
|
||||
"ments will simply use statement replication, hosted and MX-style"
|
||||
"deployments should set this parameter to 'streaming'."),
|
||||
gettext_noop("Deprecated. Please use citus.shard_replication_factor instead"),
|
||||
gettext_noop(
|
||||
"Shard replication model is determined by the shard replication factor."
|
||||
"'statement' replication is used only when the replication factor is one."),
|
||||
&ReplicationModel,
|
||||
REPLICATION_MODEL_COORDINATOR,
|
||||
REPLICATION_MODEL_STREAMING,
|
||||
replication_model_options,
|
||||
PGC_SUSET,
|
||||
GUC_SUPERUSER_ONLY,
|
||||
GUC_NO_SHOW_ALL,
|
||||
WarnIfReplicationModelIsSet, NULL, NULL);
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
"citus.running_under_isolation_test",
|
||||
gettext_noop(
|
||||
"Only useful for testing purposes, when set to true, Citus does some "
|
||||
"tricks to implement useful isolation tests with rebalancing. Should "
|
||||
"never be set to true on production systems "),
|
||||
gettext_noop("for details of the tricks implemented, refer to the source code"),
|
||||
&RunningUnderIsolationTest,
|
||||
false,
|
||||
PGC_SUSET,
|
||||
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
|
@ -1741,6 +1833,32 @@ NoticeIfSubqueryPushdownEnabled(bool *newval, void **extra, GucSource source)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* WarnIfReplicationModelIsSet prints a warning when a user sets
|
||||
* citus.replication_model.
|
||||
*/
|
||||
static bool
|
||||
WarnIfReplicationModelIsSet(int *newval, void **extra, GucSource source)
|
||||
{
|
||||
/* print a warning only when user sets the guc */
|
||||
if (source == PGC_S_SESSION)
|
||||
{
|
||||
ereport(NOTICE, (errcode(ERRCODE_WARNING_DEPRECATED_FEATURE),
|
||||
errmsg(
|
||||
"Setting citus.replication_model has no effect. Please use "
|
||||
"citus.shard_replication_factor instead."),
|
||||
errdetail(
|
||||
"Citus determines the replication model based on the "
|
||||
"replication factor and the replication models of the colocated "
|
||||
"shards. If a colocated table is present, the replication model "
|
||||
"is inherited. Otherwise 'streaming' replication is preferred if "
|
||||
"supported by the replication factor.")));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* NodeConninfoGucCheckHook ensures conninfo settings are in the expected form
|
||||
* and that the keywords of all non-null settings are on a allowlist devised to
|
||||
|
|
|
@ -1,9 +1,51 @@
|
|||
-- citus--10.0-3--10.1-1
|
||||
|
||||
-- add the current database to the distributed objects if not already in there.
|
||||
-- this is to reliably propagate some of the alter database commands that might be
|
||||
-- supported.
|
||||
INSERT INTO citus.pg_dist_object SELECT
|
||||
'pg_catalog.pg_database'::regclass::oid AS oid,
|
||||
(SELECT oid FROM pg_database WHERE datname = current_database()) as objid,
|
||||
0 as objsubid
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
#include "../../columnar/sql/columnar--10.0-3--10.1-1.sql"
|
||||
#include "udfs/create_distributed_table/10.1-1.sql";
|
||||
#include "udfs/worker_partitioned_relation_total_size/10.1-1.sql"
|
||||
#include "udfs/worker_partitioned_relation_size/10.1-1.sql"
|
||||
#include "udfs/worker_partitioned_table_size/10.1-1.sql"
|
||||
#include "udfs/citus_prepare_pg_upgrade/10.1-1.sql"
|
||||
#include "udfs/citus_finish_pg_upgrade/10.1-1.sql"
|
||||
#include "udfs/citus_local_disk_space_stats/10.1-1.sql"
|
||||
#include "udfs/get_rebalance_table_shards_plan/10.1-1.sql"
|
||||
#include "udfs/citus_add_rebalance_strategy/10.1-1.sql"
|
||||
|
||||
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ADD COLUMN improvement_threshold float4 NOT NULL default 0;
|
||||
UPDATE pg_catalog.pg_dist_rebalance_strategy SET improvement_threshold = 0.5 WHERE name = 'by_disk_size';
|
||||
|
||||
#include "udfs/get_rebalance_progress/10.1-1.sql"
|
||||
|
||||
-- use streaming replication when replication factor = 1
|
||||
WITH replicated_shards AS (
|
||||
SELECT shardid
|
||||
FROM pg_dist_placement
|
||||
WHERE shardstate = 1 OR shardstate = 3
|
||||
GROUP BY shardid
|
||||
HAVING count(*) <> 1 ),
|
||||
replicated_relations AS (
|
||||
SELECT DISTINCT logicalrelid
|
||||
FROM pg_dist_shard
|
||||
JOIN replicated_shards
|
||||
USING (shardid)
|
||||
)
|
||||
UPDATE pg_dist_partition
|
||||
SET repmodel = 's'
|
||||
WHERE repmodel = 'c'
|
||||
AND partmethod = 'h'
|
||||
AND logicalrelid NOT IN (SELECT * FROM replicated_relations);
|
||||
#include "udfs/citus_shards/10.1-1.sql"
|
||||
|
||||
DROP TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger ON pg_catalog.pg_dist_rebalance_strategy;
|
||||
DROP FUNCTION citus_internal.pg_dist_rebalance_strategy_enterprise_check();
|
||||
|
||||
#include "udfs/citus_cleanup_orphaned_shards/10.1-1.sql"
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
-- citus--10.1-1--10.2-1
|
||||
|
||||
-- bump version to 10.2-1
|
||||
|
|
@ -83,24 +83,7 @@ DROP EXTENSION IF EXISTS shard_rebalancer;
|
|||
#include "udfs/get_rebalance_table_shards_plan/9.0-1.sql"
|
||||
#include "udfs/replicate_table_shards/9.0-1.sql"
|
||||
#include "udfs/rebalance_table_shards/9.0-1.sql"
|
||||
|
||||
-- get_rebalance_progress returns the list of shard placement move operations along with
|
||||
-- their progressions for ongoing rebalance operations.
|
||||
--
|
||||
CREATE OR REPLACE FUNCTION get_rebalance_progress()
|
||||
RETURNS TABLE(sessionid integer,
|
||||
table_name regclass,
|
||||
shardid bigint,
|
||||
shard_size bigint,
|
||||
sourcename text,
|
||||
sourceport int,
|
||||
targetname text,
|
||||
targetport int,
|
||||
progress bigint)
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
COMMENT ON FUNCTION get_rebalance_progress()
|
||||
IS 'provides progress information about the ongoing rebalance operations';
|
||||
#include "udfs/get_rebalance_progress/9.0-1.sql"
|
||||
|
||||
DROP FUNCTION master_add_node(text, integer, integer, noderole, name);
|
||||
CREATE FUNCTION master_add_node(nodename text,
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
-- citus--10.1-1--10.0-3
|
||||
|
||||
-- remove databases as distributed objects to prevent unknown object types being managed
|
||||
-- on older versions.
|
||||
DELETE FROM citus.pg_dist_object
|
||||
WHERE classid = 'pg_catalog.pg_database'::regclass::oid;
|
||||
|
||||
#include "../../../columnar/sql/downgrades/columnar--10.1-1--10.0-3.sql"
|
||||
|
||||
DROP FUNCTION pg_catalog.create_distributed_table(regclass, text, citus.distribution_type, text, int);
|
||||
|
@ -21,4 +26,62 @@ DROP FUNCTION pg_catalog.worker_partitioned_relation_size(regclass);
|
|||
DROP FUNCTION pg_catalog.worker_partitioned_table_size(regclass);
|
||||
DROP FUNCTION pg_catalog.citus_local_disk_space_stats();
|
||||
|
||||
#include "../udfs/citus_prepare_pg_upgrade/9.5-1.sql"
|
||||
#include "../udfs/citus_finish_pg_upgrade/10.0-1.sql"
|
||||
#include "../udfs/get_rebalance_table_shards_plan/9.2-1.sql"
|
||||
|
||||
-- the migration for citus_add_rebalance_strategy from 9.2-1 was the first one,
|
||||
-- so it doesn't have a DROP. This is why we DROP manually here.
|
||||
DROP FUNCTION pg_catalog.citus_add_rebalance_strategy;
|
||||
#include "../udfs/citus_add_rebalance_strategy/9.2-1.sql"
|
||||
|
||||
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DROP COLUMN improvement_threshold;
|
||||
|
||||
-- the migration for get_rebalance_progress from 9.0-1 was the first one,
|
||||
-- so it doesn't have a DROP. This is why we DROP manually here.
|
||||
DROP FUNCTION pg_catalog.get_rebalance_progress;
|
||||
#include "../udfs/get_rebalance_progress/9.0-1.sql"
|
||||
|
||||
CREATE OR REPLACE VIEW pg_catalog.citus_shards AS
|
||||
WITH shard_sizes AS (SELECT * FROM pg_catalog.citus_shard_sizes())
|
||||
SELECT
|
||||
pg_dist_shard.logicalrelid AS table_name,
|
||||
pg_dist_shard.shardid,
|
||||
shard_name(pg_dist_shard.logicalrelid, pg_dist_shard.shardid) as shard_name,
|
||||
CASE WHEN partkey IS NOT NULL THEN 'distributed' WHEN repmodel = 't' THEN 'reference' ELSE 'local' END AS citus_table_type,
|
||||
colocationid AS colocation_id,
|
||||
pg_dist_node.nodename,
|
||||
pg_dist_node.nodeport,
|
||||
(SELECT size FROM shard_sizes WHERE
|
||||
shard_name(pg_dist_shard.logicalrelid, pg_dist_shard.shardid) = table_name
|
||||
OR
|
||||
'public.' || shard_name(pg_dist_shard.logicalrelid, pg_dist_shard.shardid) = table_name
|
||||
LIMIT 1) as shard_size
|
||||
FROM
|
||||
pg_dist_shard
|
||||
JOIN
|
||||
pg_dist_placement
|
||||
ON
|
||||
pg_dist_shard.shardid = pg_dist_placement.shardid
|
||||
JOIN
|
||||
pg_dist_node
|
||||
ON
|
||||
pg_dist_placement.groupid = pg_dist_node.groupid
|
||||
JOIN
|
||||
pg_dist_partition
|
||||
ON
|
||||
pg_dist_partition.logicalrelid = pg_dist_shard.logicalrelid
|
||||
ORDER BY
|
||||
pg_dist_shard.logicalrelid::text, shardid
|
||||
;
|
||||
|
||||
#include "../udfs/citus_finish_pg_upgrade/10.0-1.sql"
|
||||
CREATE FUNCTION citus_internal.pg_dist_rebalance_strategy_enterprise_check()
|
||||
RETURNS TRIGGER
|
||||
LANGUAGE C
|
||||
AS 'MODULE_PATHNAME';
|
||||
CREATE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger
|
||||
BEFORE INSERT OR UPDATE OR DELETE OR TRUNCATE ON pg_dist_rebalance_strategy
|
||||
FOR EACH STATEMENT EXECUTE FUNCTION citus_internal.pg_dist_rebalance_strategy_enterprise_check();
|
||||
|
||||
DROP PROCEDURE pg_catalog.citus_cleanup_orphaned_shards();
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
-- citus--10.2-1--10.1-1
|
||||
-- this is an empty downgrade path since citus--10.1-1--10.2-1.sql is empty for now
|
|
@ -0,0 +1,30 @@
|
|||
DROP FUNCTION pg_catalog.citus_add_rebalance_strategy;
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.citus_add_rebalance_strategy(
|
||||
name name,
|
||||
shard_cost_function regproc,
|
||||
node_capacity_function regproc,
|
||||
shard_allowed_on_node_function regproc,
|
||||
default_threshold float4,
|
||||
minimum_threshold float4 DEFAULT 0,
|
||||
improvement_threshold float4 DEFAULT 0
|
||||
)
|
||||
RETURNS VOID AS $$
|
||||
INSERT INTO
|
||||
pg_catalog.pg_dist_rebalance_strategy(
|
||||
name,
|
||||
shard_cost_function,
|
||||
node_capacity_function,
|
||||
shard_allowed_on_node_function,
|
||||
default_threshold,
|
||||
minimum_threshold
|
||||
) VALUES (
|
||||
name,
|
||||
shard_cost_function,
|
||||
node_capacity_function,
|
||||
shard_allowed_on_node_function,
|
||||
default_threshold,
|
||||
minimum_threshold
|
||||
);
|
||||
$$ LANGUAGE sql;
|
||||
COMMENT ON FUNCTION pg_catalog.citus_add_rebalance_strategy(name,regproc,regproc,regproc,float4, float4, float4)
|
||||
IS 'adds a new rebalance strategy which can be used when rebalancing shards or draining nodes';
|
|
@ -1,10 +1,12 @@
|
|||
DROP FUNCTION pg_catalog.citus_add_rebalance_strategy;
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.citus_add_rebalance_strategy(
|
||||
name name,
|
||||
shard_cost_function regproc,
|
||||
node_capacity_function regproc,
|
||||
shard_allowed_on_node_function regproc,
|
||||
default_threshold float4,
|
||||
minimum_threshold float4 DEFAULT 0
|
||||
minimum_threshold float4 DEFAULT 0,
|
||||
improvement_threshold float4 DEFAULT 0
|
||||
)
|
||||
RETURNS VOID AS $$
|
||||
INSERT INTO
|
||||
|
@ -24,5 +26,5 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_add_rebalance_strategy(
|
|||
minimum_threshold
|
||||
);
|
||||
$$ LANGUAGE sql;
|
||||
COMMENT ON FUNCTION pg_catalog.citus_add_rebalance_strategy(name,regproc,regproc,regproc,float4, float4)
|
||||
COMMENT ON FUNCTION pg_catalog.citus_add_rebalance_strategy(name,regproc,regproc,regproc,float4, float4, float4)
|
||||
IS 'adds a new rebalance strategy which can be used when rebalancing shards or draining nodes';
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
CREATE OR REPLACE PROCEDURE pg_catalog.citus_cleanup_orphaned_shards()
|
||||
LANGUAGE C
|
||||
AS 'citus', $$citus_cleanup_orphaned_shards$$;
|
||||
COMMENT ON PROCEDURE pg_catalog.citus_cleanup_orphaned_shards()
|
||||
IS 'cleanup orphaned shards';
|
|
@ -0,0 +1,5 @@
|
|||
CREATE OR REPLACE PROCEDURE pg_catalog.citus_cleanup_orphaned_shards()
|
||||
LANGUAGE C
|
||||
AS 'citus', $$citus_cleanup_orphaned_shards$$;
|
||||
COMMENT ON PROCEDURE pg_catalog.citus_cleanup_orphaned_shards()
|
||||
IS 'cleanup orphaned shards';
|
|
@ -23,7 +23,6 @@ BEGIN
|
|||
INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo;
|
||||
INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo;
|
||||
|
||||
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger;
|
||||
INSERT INTO pg_catalog.pg_dist_rebalance_strategy SELECT
|
||||
name,
|
||||
default_strategy,
|
||||
|
@ -31,9 +30,9 @@ BEGIN
|
|||
node_capacity_function::regprocedure::regproc,
|
||||
shard_allowed_on_node_function::regprocedure::regproc,
|
||||
default_threshold,
|
||||
minimum_threshold
|
||||
minimum_threshold,
|
||||
improvement_threshold
|
||||
FROM public.pg_dist_rebalance_strategy;
|
||||
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger;
|
||||
|
||||
--
|
||||
-- drop backup tables
|
||||
|
|
|
@ -23,7 +23,6 @@ BEGIN
|
|||
INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo;
|
||||
INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo;
|
||||
|
||||
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger;
|
||||
INSERT INTO pg_catalog.pg_dist_rebalance_strategy SELECT
|
||||
name,
|
||||
default_strategy,
|
||||
|
@ -31,9 +30,9 @@ BEGIN
|
|||
node_capacity_function::regprocedure::regproc,
|
||||
shard_allowed_on_node_function::regprocedure::regproc,
|
||||
default_threshold,
|
||||
minimum_threshold
|
||||
minimum_threshold,
|
||||
improvement_threshold
|
||||
FROM public.pg_dist_rebalance_strategy;
|
||||
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger;
|
||||
|
||||
--
|
||||
-- drop backup tables
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
CREATE OR REPLACE FUNCTION pg_catalog.citus_prepare_pg_upgrade()
|
||||
RETURNS void
|
||||
LANGUAGE plpgsql
|
||||
SET search_path = pg_catalog
|
||||
AS $cppu$
|
||||
BEGIN
|
||||
--
|
||||
-- Drop existing backup tables
|
||||
--
|
||||
DROP TABLE IF EXISTS public.pg_dist_partition;
|
||||
DROP TABLE IF EXISTS public.pg_dist_shard;
|
||||
DROP TABLE IF EXISTS public.pg_dist_placement;
|
||||
DROP TABLE IF EXISTS public.pg_dist_node_metadata;
|
||||
DROP TABLE IF EXISTS public.pg_dist_node;
|
||||
DROP TABLE IF EXISTS public.pg_dist_local_group;
|
||||
DROP TABLE IF EXISTS public.pg_dist_transaction;
|
||||
DROP TABLE IF EXISTS public.pg_dist_colocation;
|
||||
DROP TABLE IF EXISTS public.pg_dist_authinfo;
|
||||
DROP TABLE IF EXISTS public.pg_dist_poolinfo;
|
||||
DROP TABLE IF EXISTS public.pg_dist_rebalance_strategy;
|
||||
|
||||
--
|
||||
-- backup citus catalog tables
|
||||
--
|
||||
CREATE TABLE public.pg_dist_partition AS SELECT * FROM pg_catalog.pg_dist_partition;
|
||||
CREATE TABLE public.pg_dist_shard AS SELECT * FROM pg_catalog.pg_dist_shard;
|
||||
CREATE TABLE public.pg_dist_placement AS SELECT * FROM pg_catalog.pg_dist_placement;
|
||||
CREATE TABLE public.pg_dist_node_metadata AS SELECT * FROM pg_catalog.pg_dist_node_metadata;
|
||||
CREATE TABLE public.pg_dist_node AS SELECT * FROM pg_catalog.pg_dist_node;
|
||||
CREATE TABLE public.pg_dist_local_group AS SELECT * FROM pg_catalog.pg_dist_local_group;
|
||||
CREATE TABLE public.pg_dist_transaction AS SELECT * FROM pg_catalog.pg_dist_transaction;
|
||||
CREATE TABLE public.pg_dist_colocation AS SELECT * FROM pg_catalog.pg_dist_colocation;
|
||||
-- enterprise catalog tables
|
||||
CREATE TABLE public.pg_dist_authinfo AS SELECT * FROM pg_catalog.pg_dist_authinfo;
|
||||
CREATE TABLE public.pg_dist_poolinfo AS SELECT * FROM pg_catalog.pg_dist_poolinfo;
|
||||
CREATE TABLE public.pg_dist_rebalance_strategy AS SELECT
|
||||
name,
|
||||
default_strategy,
|
||||
shard_cost_function::regprocedure::text,
|
||||
node_capacity_function::regprocedure::text,
|
||||
shard_allowed_on_node_function::regprocedure::text,
|
||||
default_threshold,
|
||||
minimum_threshold,
|
||||
improvement_threshold
|
||||
FROM pg_catalog.pg_dist_rebalance_strategy;
|
||||
|
||||
-- store upgrade stable identifiers on pg_dist_object catalog
|
||||
UPDATE citus.pg_dist_object
|
||||
SET (type, object_names, object_args) = (SELECT * FROM pg_identify_object_as_address(classid, objid, objsubid));
|
||||
END;
|
||||
$cppu$;
|
||||
|
||||
COMMENT ON FUNCTION pg_catalog.citus_prepare_pg_upgrade()
|
||||
IS 'perform tasks to copy citus settings to a location that could later be restored after pg_upgrade is done';
|
|
@ -40,7 +40,8 @@ BEGIN
|
|||
node_capacity_function::regprocedure::text,
|
||||
shard_allowed_on_node_function::regprocedure::text,
|
||||
default_threshold,
|
||||
minimum_threshold
|
||||
minimum_threshold,
|
||||
improvement_threshold
|
||||
FROM pg_catalog.pg_dist_rebalance_strategy;
|
||||
|
||||
-- store upgrade stable identifiers on pg_dist_object catalog
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
CREATE OR REPLACE VIEW pg_catalog.citus_shards AS
|
||||
SELECT
|
||||
pg_dist_shard.logicalrelid AS table_name,
|
||||
pg_dist_shard.shardid,
|
||||
shard_name(pg_dist_shard.logicalrelid, pg_dist_shard.shardid) as shard_name,
|
||||
CASE WHEN partkey IS NOT NULL THEN 'distributed' WHEN repmodel = 't' THEN 'reference' ELSE 'local' END AS citus_table_type,
|
||||
colocationid AS colocation_id,
|
||||
pg_dist_node.nodename,
|
||||
pg_dist_node.nodeport,
|
||||
size as shard_size
|
||||
FROM
|
||||
pg_dist_shard
|
||||
JOIN
|
||||
pg_dist_placement
|
||||
ON
|
||||
pg_dist_shard.shardid = pg_dist_placement.shardid
|
||||
JOIN
|
||||
pg_dist_node
|
||||
ON
|
||||
pg_dist_placement.groupid = pg_dist_node.groupid
|
||||
JOIN
|
||||
pg_dist_partition
|
||||
ON
|
||||
pg_dist_partition.logicalrelid = pg_dist_shard.logicalrelid
|
||||
LEFT JOIN
|
||||
(SELECT (regexp_matches(table_name,'_(\d+)$'))[1]::int as shard_id, max(size) as size from citus_shard_sizes() GROUP BY shard_id) as shard_sizes
|
||||
ON
|
||||
pg_dist_shard.shardid = shard_sizes.shard_id
|
||||
WHERE
|
||||
pg_dist_placement.shardstate = 1
|
||||
ORDER BY
|
||||
pg_dist_shard.logicalrelid::text, shardid
|
||||
;
|
||||
|
||||
GRANT SELECT ON pg_catalog.citus_shards TO public;
|
|
@ -1,5 +1,4 @@
|
|||
CREATE OR REPLACE VIEW citus.citus_shards AS
|
||||
WITH shard_sizes AS (SELECT * FROM pg_catalog.citus_shard_sizes())
|
||||
CREATE OR REPLACE VIEW pg_catalog.citus_shards AS
|
||||
SELECT
|
||||
pg_dist_shard.logicalrelid AS table_name,
|
||||
pg_dist_shard.shardid,
|
||||
|
@ -8,11 +7,7 @@ SELECT
|
|||
colocationid AS colocation_id,
|
||||
pg_dist_node.nodename,
|
||||
pg_dist_node.nodeport,
|
||||
(SELECT size FROM shard_sizes WHERE
|
||||
shard_name(pg_dist_shard.logicalrelid, pg_dist_shard.shardid) = table_name
|
||||
OR
|
||||
'public.' || shard_name(pg_dist_shard.logicalrelid, pg_dist_shard.shardid) = table_name
|
||||
LIMIT 1) as shard_size
|
||||
size as shard_size
|
||||
FROM
|
||||
pg_dist_shard
|
||||
JOIN
|
||||
|
@ -27,9 +22,14 @@ JOIN
|
|||
pg_dist_partition
|
||||
ON
|
||||
pg_dist_partition.logicalrelid = pg_dist_shard.logicalrelid
|
||||
LEFT JOIN
|
||||
(SELECT (regexp_matches(table_name,'_(\d+)$'))[1]::int as shard_id, max(size) as size from citus_shard_sizes() GROUP BY shard_id) as shard_sizes
|
||||
ON
|
||||
pg_dist_shard.shardid = shard_sizes.shard_id
|
||||
WHERE
|
||||
pg_dist_placement.shardstate = 1
|
||||
ORDER BY
|
||||
pg_dist_shard.logicalrelid::text, shardid
|
||||
;
|
||||
|
||||
ALTER VIEW citus.citus_shards SET SCHEMA pg_catalog;
|
||||
GRANT SELECT ON pg_catalog.citus_shards TO public;
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
DROP FUNCTION pg_catalog.get_rebalance_progress();
|
||||
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.get_rebalance_progress()
|
||||
RETURNS TABLE(sessionid integer,
|
||||
table_name regclass,
|
||||
shardid bigint,
|
||||
shard_size bigint,
|
||||
sourcename text,
|
||||
sourceport int,
|
||||
targetname text,
|
||||
targetport int,
|
||||
progress bigint,
|
||||
source_shard_size bigint,
|
||||
target_shard_size bigint)
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
COMMENT ON FUNCTION pg_catalog.get_rebalance_progress()
|
||||
IS 'provides progress information about the ongoing rebalance operations';
|
|
@ -0,0 +1,17 @@
|
|||
-- get_rebalance_progress returns the list of shard placement move operations along with
|
||||
-- their progressions for ongoing rebalance operations.
|
||||
--
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.get_rebalance_progress()
|
||||
RETURNS TABLE(sessionid integer,
|
||||
table_name regclass,
|
||||
shardid bigint,
|
||||
shard_size bigint,
|
||||
sourcename text,
|
||||
sourceport int,
|
||||
targetname text,
|
||||
targetport int,
|
||||
progress bigint)
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
COMMENT ON FUNCTION pg_catalog.get_rebalance_progress()
|
||||
IS 'provides progress information about the ongoing rebalance operations';
|
|
@ -0,0 +1,18 @@
|
|||
DROP FUNCTION pg_catalog.get_rebalance_progress();
|
||||
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.get_rebalance_progress()
|
||||
RETURNS TABLE(sessionid integer,
|
||||
table_name regclass,
|
||||
shardid bigint,
|
||||
shard_size bigint,
|
||||
sourcename text,
|
||||
sourceport int,
|
||||
targetname text,
|
||||
targetport int,
|
||||
progress bigint,
|
||||
source_shard_size bigint,
|
||||
target_shard_size bigint)
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
COMMENT ON FUNCTION pg_catalog.get_rebalance_progress()
|
||||
IS 'provides progress information about the ongoing rebalance operations';
|
27
src/backend/distributed/sql/udfs/get_rebalance_table_shards_plan/10.1-1.sql
generated
Normal file
27
src/backend/distributed/sql/udfs/get_rebalance_table_shards_plan/10.1-1.sql
generated
Normal file
|
@ -0,0 +1,27 @@
|
|||
-- get_rebalance_table_shards_plan shows the actual events that will be performed
|
||||
-- if a rebalance operation will be performed with the same arguments, which allows users
|
||||
-- to understand the impact of the change overall availability of the application and
|
||||
-- network trafic.
|
||||
--
|
||||
DROP FUNCTION pg_catalog.get_rebalance_table_shards_plan;
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.get_rebalance_table_shards_plan(
|
||||
relation regclass default NULL,
|
||||
threshold float4 default NULL,
|
||||
max_shard_moves int default 1000000,
|
||||
excluded_shard_list bigint[] default '{}',
|
||||
drain_only boolean default false,
|
||||
rebalance_strategy name default NULL,
|
||||
improvement_threshold float4 DEFAULT NULL
|
||||
)
|
||||
RETURNS TABLE (table_name regclass,
|
||||
shardid bigint,
|
||||
shard_size bigint,
|
||||
sourcename text,
|
||||
sourceport int,
|
||||
targetname text,
|
||||
targetport int)
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C VOLATILE;
|
||||
COMMENT ON FUNCTION pg_catalog.get_rebalance_table_shards_plan(regclass, float4, int, bigint[], boolean, name, float4)
|
||||
IS 'returns the list of shard placement moves to be done on a rebalance operation';
|
||||
|
|
@ -10,7 +10,8 @@ CREATE OR REPLACE FUNCTION pg_catalog.get_rebalance_table_shards_plan(
|
|||
max_shard_moves int default 1000000,
|
||||
excluded_shard_list bigint[] default '{}',
|
||||
drain_only boolean default false,
|
||||
rebalance_strategy name default NULL
|
||||
rebalance_strategy name default NULL,
|
||||
improvement_threshold float4 DEFAULT NULL
|
||||
)
|
||||
RETURNS TABLE (table_name regclass,
|
||||
shardid bigint,
|
||||
|
@ -21,6 +22,6 @@ CREATE OR REPLACE FUNCTION pg_catalog.get_rebalance_table_shards_plan(
|
|||
targetport int)
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C VOLATILE;
|
||||
COMMENT ON FUNCTION pg_catalog.get_rebalance_table_shards_plan(regclass, float4, int, bigint[], boolean, name)
|
||||
COMMENT ON FUNCTION pg_catalog.get_rebalance_table_shards_plan(regclass, float4, int, bigint[], boolean, name, float4)
|
||||
IS 'returns the list of shard placement moves to be done on a rebalance operation';
|
||||
|
||||
|
|
|
@ -39,6 +39,8 @@ PG_FUNCTION_INFO_V1(get_adjacency_list_wait_graph);
|
|||
Datum
|
||||
get_adjacency_list_wait_graph(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
TupleDesc tupleDescriptor = NULL;
|
||||
|
||||
HASH_SEQ_STATUS status;
|
||||
|
@ -47,8 +49,6 @@ get_adjacency_list_wait_graph(PG_FUNCTION_ARGS)
|
|||
Datum values[2];
|
||||
bool isNulls[2];
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor);
|
||||
WaitGraph *waitGraph = BuildGlobalWaitGraph();
|
||||
HTAB *adjacencyList = BuildAdjacencyListsForWaitGraph(waitGraph);
|
||||
|
|
|
@ -77,11 +77,11 @@ drop_constraint_cascade_via_perform_deletion(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
get_referencing_relation_id_list(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
FuncCallContext *functionContext = NULL;
|
||||
ListCell *foreignRelationCell = NULL;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/* for the first we call this UDF, we need to populate the result to return set */
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
|
@ -136,11 +136,11 @@ get_referencing_relation_id_list(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
get_referenced_relation_id_list(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
FuncCallContext *functionContext = NULL;
|
||||
ListCell *foreignRelationCell = NULL;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/* for the first we call this UDF, we need to populate the result to return set */
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
|
|
|
@ -30,11 +30,11 @@ PG_FUNCTION_INFO_V1(get_foreign_key_to_reference_table_commands);
|
|||
Datum
|
||||
get_foreign_key_to_reference_table_commands(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
FuncCallContext *functionContext = NULL;
|
||||
ListCell *commandsCell = NULL;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/* for the first we call this UDF, we need to populate the result to return set */
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
|
|
|
@ -34,6 +34,8 @@ PG_FUNCTION_INFO_V1(store_intermediate_result_on_node);
|
|||
Datum
|
||||
store_intermediate_result_on_node(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
text *nodeNameText = PG_GETARG_TEXT_P(0);
|
||||
char *nodeNameString = text_to_cstring(nodeNameText);
|
||||
int nodePort = PG_GETARG_INT32(1);
|
||||
|
@ -44,8 +46,6 @@ store_intermediate_result_on_node(PG_FUNCTION_ARGS)
|
|||
bool writeLocalFile = false;
|
||||
ParamListInfo paramListInfo = NULL;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
WorkerNode *workerNode = FindWorkerNodeOrError(nodeNameString, nodePort);
|
||||
|
||||
/*
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
|
||||
#include "catalog/pg_type.h"
|
||||
#include "distributed/connection_management.h"
|
||||
#include "distributed/intermediate_result_pruning.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/maintenanced.h"
|
||||
#include "distributed/metadata_sync.h"
|
||||
|
@ -104,7 +105,7 @@ wait_until_metadata_sync(PG_FUNCTION_ARGS)
|
|||
}
|
||||
|
||||
MultiConnection *connection = GetNodeConnection(FORCE_NEW_CONNECTION,
|
||||
"localhost", PostPortNumber);
|
||||
LOCAL_HOST_NAME, PostPortNumber);
|
||||
ExecuteCriticalRemoteCommand(connection, "LISTEN " METADATA_SYNC_CHANNEL);
|
||||
|
||||
int waitFlags = WL_SOCKET_READABLE | WL_TIMEOUT | WL_POSTMASTER_DEATH;
|
||||
|
|
|
@ -17,10 +17,10 @@
|
|||
|
||||
#include "access/xact.h"
|
||||
#include "distributed/connection_management.h"
|
||||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/function_utils.h"
|
||||
#include "distributed/intermediate_result_pruning.h"
|
||||
#include "distributed/lock_graph.h"
|
||||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/remote_commands.h"
|
||||
#include "distributed/run_from_same_connection.h"
|
||||
|
@ -83,13 +83,13 @@ AllowNonIdleTransactionOnXactHandling(void)
|
|||
Datum
|
||||
start_session_level_connection_to_node(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
text *nodeName = PG_GETARG_TEXT_P(0);
|
||||
uint32 nodePort = PG_GETARG_UINT32(1);
|
||||
char *nodeNameString = text_to_cstring(nodeName);
|
||||
int connectionFlags = 0;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (singleConnection != NULL && (strcmp(singleConnection->hostname,
|
||||
nodeNameString) != 0 ||
|
||||
singleConnection->port != nodePort))
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "distributed/connection_management.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/multi_physical_planner.h"
|
||||
#include "distributed/shard_cleaner.h"
|
||||
#include "distributed/shard_rebalancer.h"
|
||||
#include "funcapi.h"
|
||||
#include "miscadmin.h"
|
||||
|
@ -50,6 +51,7 @@ static ShardCost GetShardCost(uint64 shardId, void *context);
|
|||
PG_FUNCTION_INFO_V1(shard_placement_rebalance_array);
|
||||
PG_FUNCTION_INFO_V1(shard_placement_replication_array);
|
||||
PG_FUNCTION_INFO_V1(worker_node_responsive);
|
||||
PG_FUNCTION_INFO_V1(run_try_drop_marked_shards);
|
||||
|
||||
typedef struct ShardPlacementTestInfo
|
||||
{
|
||||
|
@ -71,6 +73,17 @@ typedef struct RebalancePlanContext
|
|||
List *shardPlacementTestInfoList;
|
||||
} RebalancePlacementContext;
|
||||
|
||||
/*
|
||||
* run_try_drop_marked_shards is a wrapper to run TryDropOrphanedShards.
|
||||
*/
|
||||
Datum
|
||||
run_try_drop_marked_shards(PG_FUNCTION_ARGS)
|
||||
{
|
||||
bool waitForLocks = false;
|
||||
TryDropOrphanedShards(waitForLocks);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* shard_placement_rebalance_array returns a list of operations which can make a
|
||||
|
@ -89,6 +102,7 @@ shard_placement_rebalance_array(PG_FUNCTION_ARGS)
|
|||
float threshold = PG_GETARG_FLOAT4(2);
|
||||
int32 maxShardMoves = PG_GETARG_INT32(3);
|
||||
bool drainOnly = PG_GETARG_BOOL(4);
|
||||
float utilizationImproventThreshold = PG_GETARG_FLOAT4(5);
|
||||
|
||||
List *workerNodeList = NIL;
|
||||
List *shardPlacementListList = NIL;
|
||||
|
@ -143,6 +157,7 @@ shard_placement_rebalance_array(PG_FUNCTION_ARGS)
|
|||
threshold,
|
||||
maxShardMoves,
|
||||
drainOnly,
|
||||
utilizationImproventThreshold,
|
||||
&rebalancePlanFunctions);
|
||||
ArrayType *placementUpdateJsonArray = PlacementUpdateListToJsonArray(
|
||||
placementUpdateList);
|
||||
|
|
|
@ -106,6 +106,8 @@ PG_FUNCTION_INFO_V1(get_all_active_transactions);
|
|||
Datum
|
||||
assign_distributed_transaction_id(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid userId = GetUserId();
|
||||
|
||||
/* prepare data before acquiring spinlock to protect against errors */
|
||||
|
@ -113,8 +115,6 @@ assign_distributed_transaction_id(PG_FUNCTION_ARGS)
|
|||
uint64 transactionNumber = PG_GETARG_INT64(1);
|
||||
TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(2);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/* MyBackendData should always be avaliable, just out of paranoia */
|
||||
if (!MyBackendData)
|
||||
{
|
||||
|
@ -166,14 +166,14 @@ assign_distributed_transaction_id(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
get_current_transaction_id(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
TupleDesc tupleDescriptor = NULL;
|
||||
|
||||
Datum values[5];
|
||||
bool isNulls[5];
|
||||
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/* build a tuple descriptor for our result type */
|
||||
if (get_call_result_type(fcinfo, NULL, &tupleDescriptor) != TYPEFUNC_COMPOSITE)
|
||||
{
|
||||
|
@ -225,12 +225,13 @@ get_current_transaction_id(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
get_global_active_transactions(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
TupleDesc tupleDescriptor = NULL;
|
||||
List *workerNodeList = ActivePrimaryNonCoordinatorNodeList(NoLock);
|
||||
List *connectionList = NIL;
|
||||
StringInfo queryToSend = makeStringInfo();
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor);
|
||||
|
||||
appendStringInfo(queryToSend, GET_ACTIVE_TRANSACTION_QUERY);
|
||||
|
@ -336,9 +337,9 @@ get_global_active_transactions(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
get_all_active_transactions(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TupleDesc tupleDescriptor = NULL;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
TupleDesc tupleDescriptor = NULL;
|
||||
Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor);
|
||||
|
||||
StoreAllActiveTransactions(tupleStore, tupleDescriptor);
|
||||
|
|
|
@ -793,7 +793,8 @@ CheckConflictingRelationAccesses(Oid relationId, ShardPlacementAccessType access
|
|||
"foreign keys. Any parallel modification to "
|
||||
"those hash distributed tables in the same "
|
||||
"transaction can only be executed in sequential query "
|
||||
"execution mode", relationName)));
|
||||
"execution mode",
|
||||
relationName != NULL ? relationName : "<dropped>")));
|
||||
|
||||
/*
|
||||
* Switching to sequential mode is admittedly confusing and, could be useless
|
||||
|
|
|
@ -101,7 +101,7 @@ MemoryContext CommitContext = NULL;
|
|||
* do 2PC on the remote connections that did a modification.
|
||||
*
|
||||
* As a variable name ShouldCoordinatedTransactionUse2PC could
|
||||
* be improved. We use CoordinatedTransactionShouldUse2PC() as the
|
||||
* be improved. We use Use2PCForCoordinatedTransaction() as the
|
||||
* public API function, hence couldn't come up with a better name
|
||||
* for the underlying variable at the moment.
|
||||
*/
|
||||
|
@ -190,14 +190,14 @@ InCoordinatedTransaction(void)
|
|||
|
||||
|
||||
/*
|
||||
* CoordinatedTransactionShouldUse2PC() signals that the current coordinated
|
||||
* Use2PCForCoordinatedTransaction() signals that the current coordinated
|
||||
* transaction should use 2PC to commit.
|
||||
*
|
||||
* Note that even if 2PC is enabled, it is only used for connections that make
|
||||
* modification (DML or DDL).
|
||||
*/
|
||||
void
|
||||
CoordinatedTransactionShouldUse2PC(void)
|
||||
Use2PCForCoordinatedTransaction(void)
|
||||
{
|
||||
Assert(InCoordinatedTransaction());
|
||||
|
||||
|
|
|
@ -96,7 +96,7 @@ SendCommandToWorkerAsUser(const char *nodeName, int32 nodePort, const char *node
|
|||
uint32 connectionFlags = 0;
|
||||
|
||||
UseCoordinatedTransaction();
|
||||
CoordinatedTransactionShouldUse2PC();
|
||||
Use2PCForCoordinatedTransaction();
|
||||
|
||||
MultiConnection *transactionConnection = GetNodeUserDatabaseConnection(
|
||||
connectionFlags, nodeName,
|
||||
|
@ -404,7 +404,7 @@ SendCommandToWorkersParamsInternal(TargetWorkerSet targetWorkerSet, const char *
|
|||
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet, ShareLock);
|
||||
|
||||
UseCoordinatedTransaction();
|
||||
CoordinatedTransactionShouldUse2PC();
|
||||
Use2PCForCoordinatedTransaction();
|
||||
|
||||
/* open connections in parallel */
|
||||
WorkerNode *workerNode = NULL;
|
||||
|
|
|
@ -135,6 +135,7 @@ CopyNodeDistributedPlan(COPYFUNC_ARGS)
|
|||
COPY_NODE_FIELD(subPlanList);
|
||||
COPY_NODE_FIELD(usedSubPlanNodeList);
|
||||
COPY_SCALAR_FIELD(fastPathRouterPlan);
|
||||
COPY_SCALAR_FIELD(numberOfTimesExecuted);
|
||||
COPY_NODE_FIELD(planningError);
|
||||
}
|
||||
|
||||
|
|
|
@ -198,6 +198,7 @@ OutDistributedPlan(OUTFUNC_ARGS)
|
|||
WRITE_NODE_FIELD(subPlanList);
|
||||
WRITE_NODE_FIELD(usedSubPlanNodeList);
|
||||
WRITE_BOOL_FIELD(fastPathRouterPlan);
|
||||
WRITE_UINT_FIELD(numberOfTimesExecuted);
|
||||
|
||||
WRITE_NODE_FIELD(planningError);
|
||||
}
|
||||
|
|
|
@ -70,6 +70,9 @@ PG_FUNCTION_INFO_V1(update_distributed_table_colocation);
|
|||
Datum
|
||||
mark_tables_colocated(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
|
||||
Oid sourceRelationId = PG_GETARG_OID(0);
|
||||
ArrayType *relationIdArrayObject = PG_GETARG_ARRAYTYPE_P(1);
|
||||
|
||||
|
@ -80,8 +83,6 @@ mark_tables_colocated(PG_FUNCTION_ARGS)
|
|||
"operation")));
|
||||
}
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
EnsureTableOwner(sourceRelationId);
|
||||
|
||||
Datum *relationIdDatumArray = DeconstructArrayObject(relationIdArrayObject);
|
||||
|
@ -108,11 +109,12 @@ mark_tables_colocated(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
update_distributed_table_colocation(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
|
||||
Oid targetRelationId = PG_GETARG_OID(0);
|
||||
text *colocateWithTableNameText = PG_GETARG_TEXT_P(1);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
EnsureTableOwner(targetRelationId);
|
||||
|
||||
char *colocateWithTableName = text_to_cstring(colocateWithTableNameText);
|
||||
|
|
|
@ -49,12 +49,12 @@ PG_FUNCTION_INFO_V1(column_to_column_name);
|
|||
Datum
|
||||
column_name_to_column(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
text *columnText = PG_GETARG_TEXT_P(1);
|
||||
char *columnName = text_to_cstring(columnText);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Relation relation = relation_open(relationId, AccessShareLock);
|
||||
|
||||
Var *column = BuildDistributionKeyFromColumnName(relation, columnName);
|
||||
|
@ -100,13 +100,13 @@ column_name_to_column_id(PG_FUNCTION_ARGS)
|
|||
Datum
|
||||
column_to_column_name(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
text *columnNodeText = PG_GETARG_TEXT_P(1);
|
||||
|
||||
char *columnNodeString = text_to_cstring(columnNodeText);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
char *columnName = ColumnToColumnName(relationId, columnNodeString);
|
||||
|
||||
text *columnText = cstring_to_text(columnName);
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#if PG_VERSION_NUM >= PG_VERSION_13
|
||||
#include "common/hashfn.h"
|
||||
#endif
|
||||
#include "utils/inval.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
|
||||
|
@ -82,6 +83,7 @@ static ForeignConstraintRelationshipNode * GetRelationshipNodeForRelationId(Oid
|
|||
relationId,
|
||||
bool *isFound);
|
||||
static void CreateForeignConstraintRelationshipGraph(void);
|
||||
static bool IsForeignConstraintRelationshipGraphValid(void);
|
||||
static List * GetNeighbourList(ForeignConstraintRelationshipNode *relationshipNode,
|
||||
bool isReferencing);
|
||||
static List * GetRelationIdsFromRelationshipNodeList(List *fKeyRelationshipNodeList);
|
||||
|
@ -348,9 +350,15 @@ CreateForeignConstraintRelationshipGraph()
|
|||
/*
|
||||
* IsForeignConstraintGraphValid check whether there is a valid graph.
|
||||
*/
|
||||
bool
|
||||
static bool
|
||||
IsForeignConstraintRelationshipGraphValid()
|
||||
{
|
||||
/*
|
||||
* We might have some concurrent metadata changes. In order to get the changes,
|
||||
* we first need to accept the cache invalidation messages.
|
||||
*/
|
||||
AcceptInvalidationMessages();
|
||||
|
||||
if (fConstraintRelationshipGraph != NULL && fConstraintRelationshipGraph->isValid)
|
||||
{
|
||||
return true;
|
||||
|
|
|
@ -93,7 +93,7 @@ typedef struct MaintenanceDaemonDBData
|
|||
/* config variable for distributed deadlock detection timeout */
|
||||
double DistributedDeadlockDetectionTimeoutFactor = 2.0;
|
||||
int Recover2PCInterval = 60000;
|
||||
int DeferShardDeleteInterval = 60000;
|
||||
int DeferShardDeleteInterval = 15000;
|
||||
|
||||
/* config variables for metadata sync timeout */
|
||||
int MetadataSyncInterval = 60000;
|
||||
|
@ -644,8 +644,8 @@ CitusMaintenanceDaemonMain(Datum main_arg)
|
|||
*/
|
||||
lastShardCleanTime = GetCurrentTimestamp();
|
||||
|
||||
bool waitForCleanupLock = false;
|
||||
numberOfDroppedShards = TryDropMarkedShards(waitForCleanupLock);
|
||||
bool waitForLocks = false;
|
||||
numberOfDroppedShards = TryDropOrphanedShards(waitForLocks);
|
||||
}
|
||||
|
||||
CommitTransactionCommand();
|
||||
|
|
|
@ -193,7 +193,7 @@ EnsureReferenceTablesExistOnAllNodesExtended(char transferMode)
|
|||
int connectionFlags = OUTSIDE_TRANSACTION;
|
||||
|
||||
MultiConnection *connection = GetNodeUserDatabaseConnection(
|
||||
connectionFlags, "localhost", PostPortNumber,
|
||||
connectionFlags, LocalHostName, PostPortNumber,
|
||||
userName, NULL);
|
||||
|
||||
if (PQstatus(connection->pgConn) == CONNECTION_OK)
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue