mirror of https://github.com/citusdata/citus.git
Compare commits
95 Commits
Author | SHA1 | Date |
---|---|---|
|
11db00990f | |
|
bd5544fdc5 | |
|
e5e50570b3 | |
|
758cda1394 | |
|
8b4bab14a2 | |
|
115f2c124a | |
|
4b7af5aaaf | |
|
6b87b3ea27 | |
|
d13b989cff | |
|
0f62f1a93a | |
|
83585e32f9 | |
|
0f1f55c287 | |
|
3e8348c29e | |
|
537618aaed | |
|
6c989830d2 | |
|
259511746e | |
|
bd245b5fbb | |
|
25c71fb3d0 | |
|
28a503fad9 | |
|
30b46975b8 | |
|
5f5e5ef471 | |
|
5a1036e361 | |
|
6de2a09d79 | |
|
d485003807 | |
|
32124efd83 | |
|
c84d1d9e70 | |
|
b46f8874d3 | |
|
1492bd1e8b | |
|
4082fab0c9 | |
|
4ca544200c | |
|
e58b78f1e8 | |
|
f526eec6a8 | |
|
5759233f15 | |
|
6640c76bde | |
|
11d5d21fd8 | |
|
4fbed90505 | |
|
7214673a9f | |
|
79a274e226 | |
|
dd2dfac198 | |
|
3bcfadf2f1 | |
|
f5a7858ab9 | |
|
d7b90e0804 | |
|
74985a0977 | |
|
57a52b01a2 | |
|
c24088e12f | |
|
9a2227c70d | |
|
826ac1b099 | |
|
d9514fa697 | |
|
2f27325b15 | |
|
f41b5060f0 | |
|
823ede78ab | |
|
2ea3618f22 | |
|
88825b89a1 | |
|
a216c6b62c | |
|
fcb932268a | |
|
1200c8fd1c | |
|
0237d826d5 | |
|
e54b253713 | |
|
61efc87c53 | |
|
f5608c2769 | |
|
ecf0f2fdbf | |
|
0a09551dab | |
|
0805ef9c79 | |
|
a6435b7f6b | |
|
f13cf336f2 | |
|
46e316881b | |
|
18ab327c6c | |
|
61a89c69cd | |
|
ad9469b351 | |
|
4121788848 | |
|
e9bf5fa235 | |
|
18c7a3c188 | |
|
85a87af11c | |
|
115fa950d3 | |
|
445291d94b | |
|
28f1c2129d | |
|
205b8ec70a | |
|
6fa25d73be | |
|
bfb1ca6d0d | |
|
b355f0d9a2 | |
|
fdcb6ead43 | |
|
3fcb011b67 | |
|
8228815b38 | |
|
270234c7ff | |
|
3131d3e3c5 | |
|
a7f9dfc3f0 | |
|
049cd55346 | |
|
27ecb5cde2 | |
|
fc08ec203f | |
|
495470d291 | |
|
39a142b4d9 | |
|
ca4b529751 | |
|
e48f5d804d | |
|
85e2c6b523 | |
|
2a390b4c1d |
|
@ -365,7 +365,7 @@ jobs:
|
|||
when: on_fail
|
||||
- store_artifacts:
|
||||
name: 'Save tap logs'
|
||||
path: /home/circleci/project/src/test/recovery/tmp_check/log
|
||||
path: /home/circleci/project/src/test/<< parameters.suite >>/tmp_check/log
|
||||
when: on_fail
|
||||
- store_artifacts:
|
||||
name: 'Save core dumps'
|
||||
|
@ -552,6 +552,12 @@ workflows:
|
|||
image_tag: '12.4'
|
||||
suite: recovery
|
||||
requires: [build-12]
|
||||
- tap-test-citus:
|
||||
name: 'test-12_tap-columnar-freezing'
|
||||
pg_major: 12
|
||||
image_tag: '12.4'
|
||||
suite: columnar_freezing
|
||||
requires: [build-12]
|
||||
- test-citus:
|
||||
name: 'test-12_check-failure'
|
||||
pg_major: 12
|
||||
|
@ -620,6 +626,12 @@ workflows:
|
|||
image_tag: '13.0'
|
||||
suite: recovery
|
||||
requires: [build-13]
|
||||
- tap-test-citus:
|
||||
name: 'test-13_tap-columnar-freezing'
|
||||
pg_major: 13
|
||||
image_tag: '13.0'
|
||||
suite: columnar_freezing
|
||||
requires: [build-13]
|
||||
- test-citus:
|
||||
name: 'test-13_check-failure'
|
||||
pg_major: 13
|
||||
|
|
126
CHANGELOG.md
126
CHANGELOG.md
|
@ -1,3 +1,129 @@
|
|||
### citus v10.0.8 (April 20, 2023) ###
|
||||
|
||||
* Fixes a bug that could break `DROP SCHEMA/EXTENSON` commands when there is a
|
||||
columnar table (#5458)
|
||||
|
||||
* Fixes a crash that occurs when the aggregate that cannot be pushed-down
|
||||
returns empty result from a worker (#5679)
|
||||
|
||||
* Fixes columnar freezing/wraparound bug (#5962)
|
||||
|
||||
* Fixes memory leak issue with query results that returns single row (#6724)
|
||||
|
||||
* Prevents alter table functions from dropping extensions (#5974)
|
||||
|
||||
### citus v10.0.6 (November 12, 2021) ###
|
||||
|
||||
* Adds missing version checks for columnar tables
|
||||
|
||||
* Fixes a bug that caused `worker_append_table_to_shard` to write as superuser
|
||||
|
||||
* Fixes a bug with local cached plans on tables with dropped columns
|
||||
|
||||
* Fixes a missing `FROM` clause entry error
|
||||
|
||||
* Fixes a use after free issue that could happen when altering a distributed
|
||||
table
|
||||
|
||||
* Reinstates optimisation for uniform shard interval ranges
|
||||
|
||||
### citus v10.0.5 (August 16, 2021) ###
|
||||
|
||||
* Allows more graceful failovers when replication factor > 1
|
||||
|
||||
* Fixes a bug that causes partitions to have wrong distribution key after
|
||||
`DROP COLUMN`
|
||||
|
||||
* Improves citus_update_table_statistics and provides distributed deadlock
|
||||
detection
|
||||
|
||||
### citus v10.0.4 (July 14, 2021) ###
|
||||
|
||||
* Introduces `citus.local_hostname` GUC for connections to the current node
|
||||
|
||||
* Removes dependencies on the existence of public schema
|
||||
|
||||
* Removes limits around long partition names
|
||||
|
||||
* Fixes a bug that can cause a crash when DEBUG4 logging is enabled
|
||||
|
||||
* Fixes a bug that causes pruning incorrect shard of a range distributed table
|
||||
|
||||
* Fixes an issue that could cause citus_finish_pg_upgrade to fail
|
||||
|
||||
* Fixes FROM ONLY queries on partitioned tables
|
||||
|
||||
* Fixes issues caused by public schema being omitted in queries
|
||||
|
||||
* Fixes problems with concurrent calls of DropMarkedShards
|
||||
|
||||
* Fixes relname null bug when using parallel execution
|
||||
|
||||
* Fixes two race conditions in the get_rebalance_progress
|
||||
|
||||
### citus v10.0.3 (March 16, 2021) ###
|
||||
|
||||
* Prevents infinite recursion for queries that involve `UNION ALL`
|
||||
below `JOIN`
|
||||
|
||||
* Fixes a crash in queries with a modifying `CTE` and a `SELECT`
|
||||
without `FROM`
|
||||
|
||||
* Fixes upgrade and downgrade paths for `citus_update_table_statistics`
|
||||
|
||||
* Fixes a bug that causes `SELECT` queries to use 2PC unnecessarily
|
||||
|
||||
* Fixes a bug that might cause self-deadlocks with
|
||||
`CREATE INDEX` / `REINDEX CONCURRENTLY` commands
|
||||
|
||||
* Adds `citus.max_cached_connection_lifetime` GUC to set maximum connection
|
||||
lifetime
|
||||
|
||||
* Adds `citus.remote_copy_flush_threshold` GUC that controls
|
||||
per-shard memory usages by `COPY`
|
||||
|
||||
* Adds `citus_get_active_worker_nodes` UDF to deprecate
|
||||
`master_get_active_worker_nodes`
|
||||
|
||||
* Skips 2PC for readonly connections in a transaction
|
||||
|
||||
* Makes sure that local execution starts coordinated transaction
|
||||
|
||||
* Removes open temporary file warning when cancelling a query with
|
||||
an open tuple store
|
||||
|
||||
* Relaxes the locks when adding an existing node
|
||||
|
||||
### citus v10.0.2 (March 3, 2021) ###
|
||||
|
||||
* Adds a configure flag to enforce security
|
||||
|
||||
* Fixes a bug due to cross join without target list
|
||||
|
||||
* Fixes a bug with `UNION ALL` on PG 13
|
||||
|
||||
* Fixes a compatibility issue with pg_audit in utility calls
|
||||
|
||||
* Fixes insert query with CTEs/sublinks/subqueries etc
|
||||
|
||||
* Grants `SELECT` permission on `citus_tables` view to `public`
|
||||
|
||||
* Grants `SELECT` permission on columnar metadata tables to `public`
|
||||
|
||||
* Improves `citus_update_table_statistics` and provides distributed deadlock
|
||||
detection
|
||||
|
||||
* Preserves colocation with procedures in `alter_distributed_table`
|
||||
|
||||
* Prevents using `alter_columnar_table_set` and `alter_columnar_table_reset`
|
||||
on a columnar table not owned by the user
|
||||
|
||||
* Removes limits around long table names
|
||||
|
||||
### citus v10.0.1 (February 19, 2021) ###
|
||||
|
||||
* Fixes an issue in creation of `pg_catalog.time_partitions` view
|
||||
|
||||
### citus v10.0.0 (February 16, 2021) ###
|
||||
|
||||
* Adds support for per-table option for columnar storage
|
||||
|
|
|
@ -86,6 +86,7 @@ endif
|
|||
|
||||
# Add options passed to configure or computed therein, to CFLAGS/CPPFLAGS/...
|
||||
override CFLAGS += @CFLAGS@ @CITUS_CFLAGS@
|
||||
override BITCODE_CFLAGS := $(BITCODE_CFLAGS) @CITUS_BITCODE_CFLAGS@
|
||||
ifneq ($(GIT_VERSION),)
|
||||
override CFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\"
|
||||
endif
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for Citus 10.0devel.
|
||||
# Generated by GNU Autoconf 2.69 for Citus 10.0.8.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
|
@ -579,8 +579,8 @@ MAKEFLAGS=
|
|||
# Identity of this package.
|
||||
PACKAGE_NAME='Citus'
|
||||
PACKAGE_TARNAME='citus'
|
||||
PACKAGE_VERSION='10.0devel'
|
||||
PACKAGE_STRING='Citus 10.0devel'
|
||||
PACKAGE_VERSION='10.0.8'
|
||||
PACKAGE_STRING='Citus 10.0.8'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
|
@ -628,8 +628,10 @@ POSTGRES_BUILDDIR
|
|||
POSTGRES_SRCDIR
|
||||
CITUS_LDFLAGS
|
||||
CITUS_CPPFLAGS
|
||||
CITUS_BITCODE_CFLAGS
|
||||
CITUS_CFLAGS
|
||||
GIT_BIN
|
||||
with_security_flags
|
||||
with_zstd
|
||||
with_lz4
|
||||
EGREP
|
||||
|
@ -696,6 +698,7 @@ with_libcurl
|
|||
with_reports_hostname
|
||||
with_lz4
|
||||
with_zstd
|
||||
with_security_flags
|
||||
'
|
||||
ac_precious_vars='build_alias
|
||||
host_alias
|
||||
|
@ -1258,7 +1261,7 @@ if test "$ac_init_help" = "long"; then
|
|||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures Citus 10.0devel to adapt to many kinds of systems.
|
||||
\`configure' configures Citus 10.0.8 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
|
@ -1320,7 +1323,7 @@ fi
|
|||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of Citus 10.0devel:";;
|
||||
short | recursive ) echo "Configuration of Citus 10.0.8:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
|
@ -1342,6 +1345,7 @@ Optional Packages:
|
|||
and update checks
|
||||
--without-lz4 do not use lz4
|
||||
--without-zstd do not use zstd
|
||||
--with-security-flags use security flags
|
||||
|
||||
Some influential environment variables:
|
||||
PG_CONFIG Location to find pg_config for target PostgreSQL instalation
|
||||
|
@ -1422,7 +1426,7 @@ fi
|
|||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
Citus configure 10.0devel
|
||||
Citus configure 10.0.8
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
|
@ -1905,7 +1909,7 @@ cat >config.log <<_ACEOF
|
|||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by Citus $as_me 10.0devel, which was
|
||||
It was created by Citus $as_me 10.0.8, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
|
@ -4346,6 +4350,48 @@ if test x"$citusac_cv_prog_cc_cflags__Werror_return_type" = x"yes"; then
|
|||
CITUS_CFLAGS="$CITUS_CFLAGS -Werror=return-type"
|
||||
fi
|
||||
|
||||
# Security flags
|
||||
# Flags taken from: https://liquid.microsoft.com/Web/Object/Read/ms.security/Requirements/Microsoft.Security.SystemsADM.10203#guide
|
||||
# We do not enforce the following flag because it is only available on GCC>=8
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC supports -fstack-clash-protection" >&5
|
||||
$as_echo_n "checking whether $CC supports -fstack-clash-protection... " >&6; }
|
||||
if ${citusac_cv_prog_cc_cflags__fstack_clash_protection+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
citusac_save_CFLAGS=$CFLAGS
|
||||
flag=-fstack-clash-protection
|
||||
case $flag in -Wno*)
|
||||
flag=-W$(echo $flag | cut -c 6-)
|
||||
esac
|
||||
CFLAGS="$citusac_save_CFLAGS $flag"
|
||||
ac_save_c_werror_flag=$ac_c_werror_flag
|
||||
ac_c_werror_flag=yes
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_c_try_compile "$LINENO"; then :
|
||||
citusac_cv_prog_cc_cflags__fstack_clash_protection=yes
|
||||
else
|
||||
citusac_cv_prog_cc_cflags__fstack_clash_protection=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||
ac_c_werror_flag=$ac_save_c_werror_flag
|
||||
CFLAGS="$citusac_save_CFLAGS"
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $citusac_cv_prog_cc_cflags__fstack_clash_protection" >&5
|
||||
$as_echo "$citusac_cv_prog_cc_cflags__fstack_clash_protection" >&6; }
|
||||
if test x"$citusac_cv_prog_cc_cflags__fstack_clash_protection" = x"yes"; then
|
||||
CITUS_CFLAGS="$CITUS_CFLAGS -fstack-clash-protection"
|
||||
fi
|
||||
|
||||
|
||||
#
|
||||
# --enable-coverage enables generation of code coverage metrics with gcov
|
||||
|
@ -4493,8 +4539,8 @@ if test "$version_num" != '11'; then
|
|||
$as_echo "#define HAS_TABLEAM 1" >>confdefs.h
|
||||
|
||||
else
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: postgres version does not support table access methodds" >&5
|
||||
$as_echo "$as_me: postgres version does not support table access methodds" >&6;}
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: postgres version does not support table access methods" >&5
|
||||
$as_echo "$as_me: postgres version does not support table access methods" >&6;}
|
||||
fi;
|
||||
|
||||
# Require lz4 & zstd only if we are compiling columnar
|
||||
|
@ -4687,6 +4733,55 @@ fi
|
|||
|
||||
fi # test "$HAS_TABLEAM" == 'yes'
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Check whether --with-security-flags was given.
|
||||
if test "${with_security_flags+set}" = set; then :
|
||||
withval=$with_security_flags;
|
||||
case $withval in
|
||||
yes)
|
||||
:
|
||||
;;
|
||||
no)
|
||||
:
|
||||
;;
|
||||
*)
|
||||
as_fn_error $? "no argument expected for --with-security-flags option" "$LINENO" 5
|
||||
;;
|
||||
esac
|
||||
|
||||
else
|
||||
with_security_flags=no
|
||||
|
||||
fi
|
||||
|
||||
|
||||
|
||||
|
||||
if test "$with_security_flags" = yes; then
|
||||
# Flags taken from: https://liquid.microsoft.com/Web/Object/Read/ms.security/Requirements/Microsoft.Security.SystemsADM.10203#guide
|
||||
|
||||
# We always want to have some compiler flags for security concerns.
|
||||
SECURITY_CFLAGS="-fstack-protector-strong -D_FORTIFY_SOURCE=2 -O2 -z noexecstack -fpic -shared -Wl,-z,relro -Wl,-z,now -Wformat -Wformat-security -Werror=format-security"
|
||||
CITUS_CFLAGS="$CITUS_CFLAGS $SECURITY_CFLAGS"
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: Blindly added security flags for linker: $SECURITY_CFLAGS" >&5
|
||||
$as_echo "$as_me: Blindly added security flags for linker: $SECURITY_CFLAGS" >&6;}
|
||||
|
||||
# We always want to have some clang flags for security concerns.
|
||||
# This doesn't include "-Wl,-z,relro -Wl,-z,now" on purpuse, because bitcode is not linked.
|
||||
# This doesn't include -fsanitize=cfi because it breaks builds on many distros including
|
||||
# Debian/Buster, Debian/Stretch, Ubuntu/Bionic, Ubuntu/Xenial and EL7.
|
||||
SECURITY_BITCODE_CFLAGS="-fsanitize=safe-stack -fstack-protector-strong -flto -fPIC -Wformat -Wformat-security -Werror=format-security"
|
||||
CITUS_BITCODE_CFLAGS="$CITUS_BITCODE_CFLAGS $SECURITY_BITCODE_CFLAGS"
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: Blindly added security flags for llvm: $SECURITY_BITCODE_CFLAGS" >&5
|
||||
$as_echo "$as_me: Blindly added security flags for llvm: $SECURITY_BITCODE_CFLAGS" >&6;}
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: If you run into issues during linking or bitcode compilation, you can use --without-security-flags." >&5
|
||||
$as_echo "$as_me: WARNING: If you run into issues during linking or bitcode compilation, you can use --without-security-flags." >&2;}
|
||||
fi
|
||||
|
||||
# Check if git is installed, when installed the gitref of the checkout will be baked in the application
|
||||
# Extract the first word of "git", so it can be a program name with args.
|
||||
set dummy git; ac_word=$2
|
||||
|
@ -4752,6 +4847,8 @@ fi
|
|||
|
||||
CITUS_CFLAGS="$CITUS_CFLAGS"
|
||||
|
||||
CITUS_BITCODE_CFLAGS="$CITUS_BITCODE_CFLAGS"
|
||||
|
||||
CITUS_CPPFLAGS="$CITUS_CPPFLAGS"
|
||||
|
||||
CITUS_LDFLAGS="$LIBS $CITUS_LDFLAGS"
|
||||
|
@ -5276,7 +5373,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by Citus $as_me 10.0devel, which was
|
||||
This file was extended by Citus $as_me 10.0.8, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
|
@ -5338,7 +5435,7 @@ _ACEOF
|
|||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
Citus config.status 10.0devel
|
||||
Citus config.status 10.0.8
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
|
33
configure.in
33
configure.in
|
@ -5,7 +5,7 @@
|
|||
# everyone needing autoconf installed, the resulting files are checked
|
||||
# into the SCM.
|
||||
|
||||
AC_INIT([Citus], [10.0devel])
|
||||
AC_INIT([Citus], [10.0.8])
|
||||
AC_COPYRIGHT([Copyright (c) Citus Data, Inc.])
|
||||
|
||||
# we'll need sed and awk for some of the version commands
|
||||
|
@ -174,6 +174,10 @@ CITUSAC_PROG_CC_CFLAGS_OPT([-Werror=vla]) # visual studio does not support thes
|
|||
CITUSAC_PROG_CC_CFLAGS_OPT([-Werror=implicit-int])
|
||||
CITUSAC_PROG_CC_CFLAGS_OPT([-Werror=implicit-function-declaration])
|
||||
CITUSAC_PROG_CC_CFLAGS_OPT([-Werror=return-type])
|
||||
# Security flags
|
||||
# Flags taken from: https://liquid.microsoft.com/Web/Object/Read/ms.security/Requirements/Microsoft.Security.SystemsADM.10203#guide
|
||||
# We do not enforce the following flag because it is only available on GCC>=8
|
||||
CITUSAC_PROG_CC_CFLAGS_OPT([-fstack-clash-protection])
|
||||
|
||||
#
|
||||
# --enable-coverage enables generation of code coverage metrics with gcov
|
||||
|
@ -216,7 +220,7 @@ if test "$version_num" != '11'; then
|
|||
HAS_TABLEAM=yes
|
||||
AC_DEFINE([HAS_TABLEAM], 1, [Define to 1 to build with table access method support, pg12 and up])
|
||||
else
|
||||
AC_MSG_NOTICE([postgres version does not support table access methodds])
|
||||
AC_MSG_NOTICE([postgres version does not support table access methods])
|
||||
fi;
|
||||
|
||||
# Require lz4 & zstd only if we are compiling columnar
|
||||
|
@ -261,11 +265,36 @@ if test "$HAS_TABLEAM" == 'yes'; then
|
|||
|
||||
fi # test "$HAS_TABLEAM" == 'yes'
|
||||
|
||||
|
||||
PGAC_ARG_BOOL(with, security-flags, no,
|
||||
[use security flags])
|
||||
AC_SUBST(with_security_flags)
|
||||
|
||||
if test "$with_security_flags" = yes; then
|
||||
# Flags taken from: https://liquid.microsoft.com/Web/Object/Read/ms.security/Requirements/Microsoft.Security.SystemsADM.10203#guide
|
||||
|
||||
# We always want to have some compiler flags for security concerns.
|
||||
SECURITY_CFLAGS="-fstack-protector-strong -D_FORTIFY_SOURCE=2 -O2 -z noexecstack -fpic -shared -Wl,-z,relro -Wl,-z,now -Wformat -Wformat-security -Werror=format-security"
|
||||
CITUS_CFLAGS="$CITUS_CFLAGS $SECURITY_CFLAGS"
|
||||
AC_MSG_NOTICE([Blindly added security flags for linker: $SECURITY_CFLAGS])
|
||||
|
||||
# We always want to have some clang flags for security concerns.
|
||||
# This doesn't include "-Wl,-z,relro -Wl,-z,now" on purpuse, because bitcode is not linked.
|
||||
# This doesn't include -fsanitize=cfi because it breaks builds on many distros including
|
||||
# Debian/Buster, Debian/Stretch, Ubuntu/Bionic, Ubuntu/Xenial and EL7.
|
||||
SECURITY_BITCODE_CFLAGS="-fsanitize=safe-stack -fstack-protector-strong -flto -fPIC -Wformat -Wformat-security -Werror=format-security"
|
||||
CITUS_BITCODE_CFLAGS="$CITUS_BITCODE_CFLAGS $SECURITY_BITCODE_CFLAGS"
|
||||
AC_MSG_NOTICE([Blindly added security flags for llvm: $SECURITY_BITCODE_CFLAGS])
|
||||
|
||||
AC_MSG_WARN([If you run into issues during linking or bitcode compilation, you can use --without-security-flags.])
|
||||
fi
|
||||
|
||||
# Check if git is installed, when installed the gitref of the checkout will be baked in the application
|
||||
AC_PATH_PROG(GIT_BIN, git)
|
||||
AC_CHECK_FILE(.git,[HAS_DOTGIT=yes], [HAS_DOTGIT=])
|
||||
|
||||
AC_SUBST(CITUS_CFLAGS, "$CITUS_CFLAGS")
|
||||
AC_SUBST(CITUS_BITCODE_CFLAGS, "$CITUS_BITCODE_CFLAGS")
|
||||
AC_SUBST(CITUS_CPPFLAGS, "$CITUS_CPPFLAGS")
|
||||
AC_SUBST(CITUS_LDFLAGS, "$LIBS $CITUS_LDFLAGS")
|
||||
AC_SUBST(POSTGRES_SRCDIR, "$POSTGRES_SRCDIR")
|
||||
|
|
|
@ -311,8 +311,13 @@ DeleteColumnarTableOptions(Oid regclass, bool missingOk)
|
|||
*/
|
||||
Assert(!IsBinaryUpgrade);
|
||||
|
||||
Relation columnarOptions = relation_open(ColumnarOptionsRelationId(),
|
||||
RowExclusiveLock);
|
||||
Relation columnarOptions = try_relation_open(ColumnarOptionsRelationId(),
|
||||
RowExclusiveLock);
|
||||
if (columnarOptions == NULL)
|
||||
{
|
||||
/* extension has been dropped */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* find existing item to remove */
|
||||
ScanKeyData scanKey[1] = { 0 };
|
||||
|
@ -1087,7 +1092,11 @@ DatumToBytea(Datum value, Form_pg_attribute attrForm)
|
|||
{
|
||||
if (attrForm->attbyval)
|
||||
{
|
||||
store_att_byval(VARDATA(result), value, attrForm->attlen);
|
||||
Datum tmp;
|
||||
store_att_byval(&tmp, value, attrForm->attlen);
|
||||
|
||||
memcpy_s(VARDATA(result), datumLength + VARHDRSZ,
|
||||
&tmp, attrForm->attlen);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#else
|
||||
#include "optimizer/clauses.h"
|
||||
#include "optimizer/predtest.h"
|
||||
#include "optimizer/var.h"
|
||||
#endif
|
||||
#include "optimizer/restrictinfo.h"
|
||||
#include "storage/fd.h"
|
||||
|
@ -62,6 +63,8 @@ struct TableReadState
|
|||
List *projectedColumnList;
|
||||
|
||||
List *whereClauseList;
|
||||
List *whereClauseVars;
|
||||
|
||||
MemoryContext stripeReadContext;
|
||||
StripeBuffers *stripeBuffers;
|
||||
uint32 readStripeCount;
|
||||
|
@ -77,6 +80,7 @@ static StripeBuffers * LoadFilteredStripeBuffers(Relation relation,
|
|||
TupleDesc tupleDescriptor,
|
||||
List *projectedColumnList,
|
||||
List *whereClauseList,
|
||||
List *whereClauseVars,
|
||||
int64 *chunkGroupsFiltered);
|
||||
static void ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList,
|
||||
uint64 chunkIndex, uint64 chunkRowIndex,
|
||||
|
@ -87,10 +91,11 @@ static ColumnBuffers * LoadColumnBuffers(Relation relation,
|
|||
uint32 chunkCount, uint64 stripeOffset,
|
||||
Form_pg_attribute attributeForm);
|
||||
static bool * SelectedChunkMask(StripeSkipList *stripeSkipList,
|
||||
List *projectedColumnList, List *whereClauseList,
|
||||
List *whereClauseList, List *whereClauseVars,
|
||||
int64 *chunkGroupsFiltered);
|
||||
static List * BuildRestrictInfoList(List *whereClauseList);
|
||||
static Node * BuildBaseConstraint(Var *variable);
|
||||
static List * GetClauseVars(List *clauses, int natts);
|
||||
static OpExpr * MakeOpExpression(Var *variable, int16 strategyNumber);
|
||||
static Oid GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber);
|
||||
static void UpdateConstraint(Node *baseConstraint, Datum minValue, Datum maxValue);
|
||||
|
@ -142,6 +147,7 @@ ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor,
|
|||
readState->stripeList = stripeList;
|
||||
readState->projectedColumnList = projectedColumnList;
|
||||
readState->whereClauseList = whereClauseList;
|
||||
readState->whereClauseVars = GetClauseVars(whereClauseList, tupleDescriptor->natts);
|
||||
readState->stripeBuffers = NULL;
|
||||
readState->readStripeCount = 0;
|
||||
readState->stripeReadRowCount = 0;
|
||||
|
@ -218,6 +224,8 @@ ColumnarReadNextRow(TableReadState *readState, Datum *columnValues, bool *column
|
|||
projectedColumnList,
|
||||
readState->
|
||||
whereClauseList,
|
||||
readState->
|
||||
whereClauseVars,
|
||||
&readState->
|
||||
chunkGroupsFiltered);
|
||||
readState->readStripeCount++;
|
||||
|
@ -400,7 +408,8 @@ ColumnarTableRowCount(Relation relation)
|
|||
static StripeBuffers *
|
||||
LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata,
|
||||
TupleDesc tupleDescriptor, List *projectedColumnList,
|
||||
List *whereClauseList, int64 *chunkGroupsFiltered)
|
||||
List *whereClauseList, List *whereClauseVars,
|
||||
int64 *chunkGroupsFiltered)
|
||||
{
|
||||
uint32 columnIndex = 0;
|
||||
uint32 columnCount = tupleDescriptor->natts;
|
||||
|
@ -412,8 +421,8 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata,
|
|||
tupleDescriptor,
|
||||
stripeMetadata->chunkCount);
|
||||
|
||||
bool *selectedChunkMask = SelectedChunkMask(stripeSkipList, projectedColumnList,
|
||||
whereClauseList, chunkGroupsFiltered);
|
||||
bool *selectedChunkMask = SelectedChunkMask(stripeSkipList, whereClauseList,
|
||||
whereClauseVars, chunkGroupsFiltered);
|
||||
|
||||
StripeSkipList *selectedChunkSkipList =
|
||||
SelectedChunkSkipList(stripeSkipList, projectedColumnMask,
|
||||
|
@ -551,8 +560,8 @@ LoadColumnBuffers(Relation relation, ColumnChunkSkipNode *chunkSkipNodeArray,
|
|||
* the chunk can be refuted by the given qualifier conditions.
|
||||
*/
|
||||
static bool *
|
||||
SelectedChunkMask(StripeSkipList *stripeSkipList, List *projectedColumnList,
|
||||
List *whereClauseList, int64 *chunkGroupsFiltered)
|
||||
SelectedChunkMask(StripeSkipList *stripeSkipList, List *whereClauseList,
|
||||
List *whereClauseVars, int64 *chunkGroupsFiltered)
|
||||
{
|
||||
ListCell *columnCell = NULL;
|
||||
uint32 chunkIndex = 0;
|
||||
|
@ -561,7 +570,7 @@ SelectedChunkMask(StripeSkipList *stripeSkipList, List *projectedColumnList,
|
|||
bool *selectedChunkMask = palloc0(stripeSkipList->chunkCount * sizeof(bool));
|
||||
memset(selectedChunkMask, true, stripeSkipList->chunkCount * sizeof(bool));
|
||||
|
||||
foreach(columnCell, projectedColumnList)
|
||||
foreach(columnCell, whereClauseVars)
|
||||
{
|
||||
Var *column = lfirst(columnCell);
|
||||
uint32 columnIndex = column->varattno - 1;
|
||||
|
@ -693,6 +702,58 @@ BuildBaseConstraint(Var *variable)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetClauseVars extracts the Vars from the given clauses for the purpose of
|
||||
* building constraints that can be refuted by predicate_refuted_by(). It also
|
||||
* deduplicates and sorts them.
|
||||
*/
|
||||
static List *
|
||||
GetClauseVars(List *whereClauseList, int natts)
|
||||
{
|
||||
/*
|
||||
* We don't recurse into or include aggregates, window functions, or
|
||||
* PHVs. We don't expect any PHVs during execution; and Vars found inside
|
||||
* an aggregate or window function aren't going to be useful in forming
|
||||
* constraints that can be refuted.
|
||||
*/
|
||||
int flags = 0;
|
||||
List *vars = pull_var_clause((Node *) whereClauseList, flags);
|
||||
Var **deduplicate = palloc0(sizeof(Var *) * natts);
|
||||
|
||||
ListCell *lc;
|
||||
foreach(lc, vars)
|
||||
{
|
||||
Node *node = lfirst(lc);
|
||||
Assert(IsA(node, Var));
|
||||
|
||||
Var *var = (Var *) node;
|
||||
int idx = var->varattno - 1;
|
||||
|
||||
if (deduplicate[idx] != NULL)
|
||||
{
|
||||
/* if they have the same varattno, the rest should be identical */
|
||||
Assert(equal(var, deduplicate[idx]));
|
||||
}
|
||||
|
||||
deduplicate[idx] = var;
|
||||
}
|
||||
|
||||
List *whereClauseVars = NIL;
|
||||
for (int i = 0; i < natts; i++)
|
||||
{
|
||||
Var *var = deduplicate[i];
|
||||
if (var != NULL)
|
||||
{
|
||||
whereClauseVars = lappend(whereClauseVars, var);
|
||||
}
|
||||
}
|
||||
|
||||
pfree(deduplicate);
|
||||
|
||||
return whereClauseVars;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* MakeOpExpression builds an operator expression node. This operator expression
|
||||
* implements the operator clause as defined by the variable and the strategy
|
||||
|
|
|
@ -160,6 +160,8 @@ columnar_beginscan(Relation relation, Snapshot snapshot,
|
|||
ParallelTableScanDesc parallel_scan,
|
||||
uint32 flags)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
int natts = relation->rd_att->natts;
|
||||
Bitmapset *attr_needed = NULL;
|
||||
|
||||
|
@ -419,6 +421,8 @@ static bool
|
|||
columnar_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
|
||||
Snapshot snapshot)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -436,6 +440,8 @@ static void
|
|||
columnar_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
|
||||
int options, BulkInsertState bistate)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/*
|
||||
* columnar_init_write_state allocates the write state in a longer
|
||||
* lasting context, so no need to worry about it.
|
||||
|
@ -481,6 +487,8 @@ static void
|
|||
columnar_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
||||
CommandId cid, int options, BulkInsertState bistate)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
TableWriteState *writeState = columnar_init_write_state(relation,
|
||||
RelationGetDescr(relation),
|
||||
GetCurrentSubTransactionId());
|
||||
|
@ -552,6 +560,8 @@ columnar_relation_set_new_filenode(Relation rel,
|
|||
TransactionId *freezeXid,
|
||||
MultiXactId *minmulti)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
if (persistence != RELPERSISTENCE_PERMANENT)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
|
@ -581,6 +591,8 @@ columnar_relation_set_new_filenode(Relation rel,
|
|||
static void
|
||||
columnar_relation_nontransactional_truncate(Relation rel)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
RelFileNode relfilenode = rel->rd_node;
|
||||
|
||||
NonTransactionDropWriteState(relfilenode.relNode);
|
||||
|
@ -625,6 +637,8 @@ columnar_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
|
|||
double *tups_vacuumed,
|
||||
double *tups_recently_dead)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
TupleDesc sourceDesc = RelationGetDescr(OldHeap);
|
||||
TupleDesc targetDesc = RelationGetDescr(NewHeap);
|
||||
|
||||
|
@ -670,6 +684,27 @@ columnar_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarTableTupleCount returns the number of tuples that columnar
|
||||
* table with relationId has by using stripe metadata.
|
||||
*/
|
||||
static uint64
|
||||
ColumnarTableTupleCount(Relation relation)
|
||||
{
|
||||
List *stripeList = StripesForRelfilenode(relation->rd_node);
|
||||
uint64 tupleCount = 0;
|
||||
|
||||
ListCell *lc = NULL;
|
||||
foreach(lc, stripeList)
|
||||
{
|
||||
StripeMetadata *stripe = lfirst(lc);
|
||||
tupleCount += stripe->rowCount;
|
||||
}
|
||||
|
||||
return tupleCount;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* columnar_vacuum_rel implements VACUUM without FULL option.
|
||||
*/
|
||||
|
@ -677,6 +712,18 @@ static void
|
|||
columnar_vacuum_rel(Relation rel, VacuumParams *params,
|
||||
BufferAccessStrategy bstrategy)
|
||||
{
|
||||
if (!CheckCitusVersion(WARNING))
|
||||
{
|
||||
/*
|
||||
* Skip if the extension catalogs are not up-to-date, but avoid
|
||||
* erroring during auto-vacuum.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
|
||||
RelationGetRelid(rel));
|
||||
|
||||
int elevel = (params->options & VACOPT_VERBOSE) ? INFO : DEBUG2;
|
||||
|
||||
/* this should have been resolved by vacuum.c until now */
|
||||
|
@ -692,6 +739,52 @@ columnar_vacuum_rel(Relation rel, VacuumParams *params,
|
|||
{
|
||||
TruncateColumnar(rel, elevel);
|
||||
}
|
||||
|
||||
RelationOpenSmgr(rel);
|
||||
BlockNumber new_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
|
||||
|
||||
/* get the number of indexes */
|
||||
List *indexList = RelationGetIndexList(rel);
|
||||
int nindexes = list_length(indexList);
|
||||
|
||||
TransactionId oldestXmin;
|
||||
TransactionId freezeLimit;
|
||||
MultiXactId multiXactCutoff;
|
||||
|
||||
/* initialize xids */
|
||||
TransactionId xidFullScanLimit;
|
||||
MultiXactId mxactFullScanLimit;
|
||||
vacuum_set_xid_limits(rel,
|
||||
params->freeze_min_age,
|
||||
params->freeze_table_age,
|
||||
params->multixact_freeze_min_age,
|
||||
params->multixact_freeze_table_age,
|
||||
&oldestXmin, &freezeLimit, &xidFullScanLimit,
|
||||
&multiXactCutoff, &mxactFullScanLimit);
|
||||
|
||||
Assert(TransactionIdPrecedesOrEquals(freezeLimit, oldestXmin));
|
||||
|
||||
/*
|
||||
* Columnar storage doesn't hold any transaction IDs, so we can always
|
||||
* just advance to the most aggressive value.
|
||||
*/
|
||||
TransactionId newRelFrozenXid = oldestXmin;
|
||||
MultiXactId newRelminMxid = multiXactCutoff;
|
||||
|
||||
double new_live_tuples = ColumnarTableTupleCount(rel);
|
||||
|
||||
/* all visible pages are always 0 */
|
||||
BlockNumber new_rel_allvisible = 0;
|
||||
|
||||
vac_update_relstats(rel, new_rel_pages, new_live_tuples,
|
||||
new_rel_allvisible, nindexes > 0,
|
||||
newRelFrozenXid, newRelminMxid, false);
|
||||
|
||||
pgstat_report_vacuum(RelationGetRelid(rel),
|
||||
rel->rd_rel->relisshared,
|
||||
Max(new_live_tuples, 0),
|
||||
0);
|
||||
pgstat_progress_end_command();
|
||||
}
|
||||
|
||||
|
||||
|
@ -1006,6 +1099,8 @@ columnar_index_validate_scan(Relation heapRelation,
|
|||
static uint64
|
||||
columnar_relation_size(Relation rel, ForkNumber forkNumber)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
uint64 nblocks = 0;
|
||||
|
||||
/* Open it at the smgr level if not already done */
|
||||
|
@ -1031,6 +1126,8 @@ columnar_relation_size(Relation rel, ForkNumber forkNumber)
|
|||
static bool
|
||||
columnar_relation_needs_toast_table(Relation rel)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1040,6 +1137,8 @@ columnar_estimate_rel_size(Relation rel, int32 *attr_widths,
|
|||
BlockNumber *pages, double *tuples,
|
||||
double *allvisfrac)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
RelationOpenSmgr(rel);
|
||||
*pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
|
||||
*tuples = ColumnarTableRowCount(rel);
|
||||
|
@ -1218,6 +1317,8 @@ ColumnarTableDropHook(Oid relid)
|
|||
|
||||
if (IsColumnarTableAmTable(relid))
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/*
|
||||
* Drop metadata. No need to drop storage here since for
|
||||
* tableam tables storage is managed by postgres.
|
||||
|
@ -1653,6 +1754,8 @@ PG_FUNCTION_INFO_V1(alter_columnar_table_set);
|
|||
Datum
|
||||
alter_columnar_table_set(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
|
||||
Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */
|
||||
|
@ -1662,6 +1765,8 @@ alter_columnar_table_set(PG_FUNCTION_ARGS)
|
|||
quote_identifier(RelationGetRelationName(rel)))));
|
||||
}
|
||||
|
||||
EnsureTableOwner(relationId);
|
||||
|
||||
ColumnarOptions options = { 0 };
|
||||
if (!ReadColumnarOptions(relationId, &options))
|
||||
{
|
||||
|
@ -1760,6 +1865,8 @@ PG_FUNCTION_INFO_V1(alter_columnar_table_reset);
|
|||
Datum
|
||||
alter_columnar_table_reset(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
|
||||
Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */
|
||||
|
@ -1769,6 +1876,8 @@ alter_columnar_table_reset(PG_FUNCTION_ARGS)
|
|||
quote_identifier(RelationGetRelationName(rel)))));
|
||||
}
|
||||
|
||||
EnsureTableOwner(relationId);
|
||||
|
||||
ColumnarOptions options = { 0 };
|
||||
if (!ReadColumnarOptions(relationId, &options))
|
||||
{
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
/* columnar--10.0-1--10.0-2.sql */
|
||||
|
||||
-- grant read access for columnar metadata tables to unprivileged user
|
||||
GRANT USAGE ON SCHEMA columnar TO PUBLIC;
|
||||
GRANT SELECT ON ALL tables IN SCHEMA columnar TO PUBLIC ;
|
|
@ -0,0 +1,5 @@
|
|||
/* columnar--10.0-2--10.0-1.sql */
|
||||
|
||||
-- revoke read access for columnar metadata tables from unprivileged user
|
||||
REVOKE USAGE ON SCHEMA columnar FROM PUBLIC;
|
||||
REVOKE SELECT ON ALL tables IN SCHEMA columnar FROM PUBLIC;
|
|
@ -1,6 +1,6 @@
|
|||
# Citus extension
|
||||
comment = 'Citus distributed database'
|
||||
default_version = '10.0-1'
|
||||
default_version = '10.0-4'
|
||||
module_pathname = '$libdir/citus'
|
||||
relocatable = false
|
||||
schema = pg_catalog
|
||||
|
|
|
@ -29,9 +29,12 @@
|
|||
#include "fmgr.h"
|
||||
|
||||
#include "access/hash.h"
|
||||
#include "access/htup_details.h"
|
||||
#include "access/xact.h"
|
||||
#include "catalog/dependency.h"
|
||||
#include "catalog/pg_am.h"
|
||||
#include "catalog/pg_depend.h"
|
||||
#include "catalog/pg_rewrite_d.h"
|
||||
#include "columnar/columnar.h"
|
||||
#include "columnar/columnar_tableam.h"
|
||||
#include "distributed/colocation_utils.h"
|
||||
|
@ -43,12 +46,15 @@
|
|||
#include "distributed/listutils.h"
|
||||
#include "distributed/local_executor.h"
|
||||
#include "distributed/metadata/dependency.h"
|
||||
#include "distributed/metadata/distobject.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/metadata_sync.h"
|
||||
#include "distributed/multi_executor.h"
|
||||
#include "distributed/multi_logical_planner.h"
|
||||
#include "distributed/multi_partitioning_utils.h"
|
||||
#include "distributed/reference_table_utils.h"
|
||||
#include "distributed/relation_access_tracking.h"
|
||||
#include "distributed/shard_utils.h"
|
||||
#include "distributed/worker_protocol.h"
|
||||
#include "distributed/worker_transaction.h"
|
||||
#include "executor/spi.h"
|
||||
|
@ -175,6 +181,8 @@ static TableConversionReturn * AlterDistributedTable(TableConversionParameters *
|
|||
static TableConversionReturn * AlterTableSetAccessMethod(
|
||||
TableConversionParameters *params);
|
||||
static TableConversionReturn * ConvertTable(TableConversionState *con);
|
||||
static bool SwitchToSequentialAndLocalExecutionIfShardNameTooLong(char *relationName,
|
||||
char *longestShardName);
|
||||
static void EnsureTableNotReferencing(Oid relationId, char conversionType);
|
||||
static void EnsureTableNotReferenced(Oid relationId, char conversionType);
|
||||
static void EnsureTableNotForeign(Oid relationId);
|
||||
|
@ -198,6 +206,8 @@ static bool WillRecreateForeignKeyToReferenceTable(Oid relationId,
|
|||
CascadeToColocatedOption cascadeOption);
|
||||
static void WarningsForDroppingForeignKeysWithDistributedTables(Oid relationId);
|
||||
static void ExecuteQueryViaSPI(char *query, int SPIOK);
|
||||
static void ErrorIfUnsupportedCascadeObjects(Oid relationId);
|
||||
static bool DoesCascadeDropUnsupportedObject(Oid classId, Oid id, HTAB *nodeMap);
|
||||
|
||||
PG_FUNCTION_INFO_V1(undistribute_table);
|
||||
PG_FUNCTION_INFO_V1(alter_distributed_table);
|
||||
|
@ -375,6 +385,8 @@ UndistributeTable(TableConversionParameters *params)
|
|||
ErrorIfAnyPartitionRelationInvolvedInNonInheritedFKey(partitionList);
|
||||
}
|
||||
|
||||
ErrorIfUnsupportedCascadeObjects(params->relationId);
|
||||
|
||||
params->conversionType = UNDISTRIBUTE_TABLE;
|
||||
params->shardCountIsNull = true;
|
||||
TableConversionState *con = CreateTableConversion(params);
|
||||
|
@ -406,6 +418,8 @@ AlterDistributedTable(TableConversionParameters *params)
|
|||
EnsureTableNotPartition(params->relationId);
|
||||
EnsureHashDistributedTable(params->relationId);
|
||||
|
||||
ErrorIfUnsupportedCascadeObjects(params->relationId);
|
||||
|
||||
params->conversionType = ALTER_DISTRIBUTED_TABLE;
|
||||
TableConversionState *con = CreateTableConversion(params);
|
||||
CheckAlterDistributedTableConversionParameters(con);
|
||||
|
@ -467,6 +481,8 @@ AlterTableSetAccessMethod(TableConversionParameters *params)
|
|||
}
|
||||
}
|
||||
|
||||
ErrorIfUnsupportedCascadeObjects(params->relationId);
|
||||
|
||||
params->conversionType = ALTER_TABLE_SET_ACCESS_METHOD;
|
||||
params->shardCountIsNull = true;
|
||||
TableConversionState *con = CreateTableConversion(params);
|
||||
|
@ -511,6 +527,10 @@ ConvertTable(TableConversionState *con)
|
|||
bool oldEnableLocalReferenceForeignKeys = EnableLocalReferenceForeignKeys;
|
||||
SetLocalEnableLocalReferenceForeignKeys(false);
|
||||
|
||||
/* switch to sequential execution if shard names will be too long */
|
||||
SwitchToSequentialAndLocalExecutionIfRelationNameTooLong(con->relationId,
|
||||
con->relationName);
|
||||
|
||||
if (con->conversionType == UNDISTRIBUTE_TABLE && con->cascadeViaForeignKeys &&
|
||||
(TableReferencing(con->relationId) || TableReferenced(con->relationId)))
|
||||
{
|
||||
|
@ -673,7 +693,7 @@ ConvertTable(TableConversionState *con)
|
|||
Node *parseTree = ParseTreeNode(tableCreationSql);
|
||||
|
||||
RelayEventExtendNames(parseTree, con->schemaName, con->hashOfName);
|
||||
ProcessUtilityParseTree(parseTree, tableCreationSql, PROCESS_UTILITY_TOPLEVEL,
|
||||
ProcessUtilityParseTree(parseTree, tableCreationSql, PROCESS_UTILITY_QUERY,
|
||||
NULL, None_Receiver, NULL);
|
||||
}
|
||||
|
||||
|
@ -711,6 +731,32 @@ ConvertTable(TableConversionState *con)
|
|||
CreateCitusTableLike(con);
|
||||
}
|
||||
|
||||
/* preserve colocation with procedures/functions */
|
||||
if (con->conversionType == ALTER_DISTRIBUTED_TABLE)
|
||||
{
|
||||
/*
|
||||
* Updating the colocationId of functions is always desirable for
|
||||
* the following scenario:
|
||||
* we have shardCount or colocateWith change
|
||||
* AND entire co-location group is altered
|
||||
* The reason for the second condition is because we currently don't
|
||||
* remember the original table specified in the colocateWith when
|
||||
* distributing the function. We only remember the colocationId in
|
||||
* pg_dist_object table.
|
||||
*/
|
||||
if ((!con->shardCountIsNull || con->colocateWith != NULL) &&
|
||||
(con->cascadeToColocated == CASCADE_TO_COLOCATED_YES || list_length(
|
||||
con->colocatedTableList) == 1) && con->distributionColumn == NULL)
|
||||
{
|
||||
/*
|
||||
* Update the colocationId from the one of the old relation to the one
|
||||
* of the new relation for all tuples in citus.pg_dist_object
|
||||
*/
|
||||
UpdateDistributedObjectColocationId(TableColocationId(con->relationId),
|
||||
TableColocationId(con->newRelationId));
|
||||
}
|
||||
}
|
||||
|
||||
ReplaceTable(con->relationId, con->newRelationId, justBeforeDropCommands,
|
||||
con->suppressNoticeMessages);
|
||||
|
||||
|
@ -728,7 +774,7 @@ ConvertTable(TableConversionState *con)
|
|||
Node *parseTree = ParseTreeNode(attachPartitionCommand);
|
||||
|
||||
ProcessUtilityParseTree(parseTree, attachPartitionCommand,
|
||||
PROCESS_UTILITY_TOPLEVEL,
|
||||
PROCESS_UTILITY_QUERY,
|
||||
NULL, None_Receiver, NULL);
|
||||
}
|
||||
|
||||
|
@ -1042,6 +1088,30 @@ CreateDistributedTableLike(TableConversionState *con)
|
|||
{
|
||||
newShardCount = con->shardCount;
|
||||
}
|
||||
|
||||
Oid originalRelationId = con->relationId;
|
||||
if (con->originalDistributionKey != NULL && PartitionTable(originalRelationId))
|
||||
{
|
||||
/*
|
||||
* Due to dropped columns, the partition tables might have different
|
||||
* distribution keys than their parents, see issue #5123 for details.
|
||||
*
|
||||
* At this point, we get the partitioning information from the
|
||||
* originalRelationId, but we get the distribution key for newRelationId.
|
||||
*
|
||||
* We have to do this, because the newRelationId is just a placeholder
|
||||
* at this moment, but that's going to be the table in pg_dist_partition.
|
||||
*/
|
||||
Oid parentRelationId = PartitionParentOid(originalRelationId);
|
||||
Var *parentDistKey = DistPartitionKey(parentRelationId);
|
||||
char *parentDistKeyColumnName =
|
||||
ColumnToColumnName(parentRelationId, nodeToString(parentDistKey));
|
||||
|
||||
newDistributionKey =
|
||||
FindColumnWithNameOnTargetRelation(parentRelationId, parentDistKeyColumnName,
|
||||
con->newRelationId);
|
||||
}
|
||||
|
||||
char partitionMethod = PartitionMethod(con->relationId);
|
||||
CreateDistributedTable(con->newRelationId, newDistributionKey, partitionMethod,
|
||||
newShardCount, newColocateWith, false);
|
||||
|
@ -1077,6 +1147,94 @@ CreateCitusTableLike(TableConversionState *con)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ErrorIfUnsupportedCascadeObjects gets oid of a relation, finds the objects
|
||||
* that dropping this relation cascades into and errors if there are any extensions
|
||||
* that would be dropped.
|
||||
*/
|
||||
static void
|
||||
ErrorIfUnsupportedCascadeObjects(Oid relationId)
|
||||
{
|
||||
HASHCTL info;
|
||||
memset(&info, 0, sizeof(info));
|
||||
info.keysize = sizeof(Oid);
|
||||
info.entrysize = sizeof(Oid);
|
||||
info.hash = oid_hash;
|
||||
uint32 hashFlags = (HASH_ELEM | HASH_FUNCTION);
|
||||
HTAB *nodeMap = hash_create("object dependency map (oid)", 64, &info, hashFlags);
|
||||
|
||||
bool unsupportedObjectInDepGraph =
|
||||
DoesCascadeDropUnsupportedObject(RelationRelationId, relationId, nodeMap);
|
||||
|
||||
if (unsupportedObjectInDepGraph)
|
||||
{
|
||||
ereport(ERROR, (errmsg("cannot alter table because an extension depends on it")));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DoesCascadeDropUnsupportedObject walks through the objects that depend on the
|
||||
* object with object id and returns true if it finds any unsupported objects.
|
||||
*
|
||||
* This function only checks extensions as unsupported objects.
|
||||
*
|
||||
* Extension dependency is different than the rest. If an object depends on an extension
|
||||
* dropping the object would drop the extension too.
|
||||
* So we check with IsObjectAddressOwnedByExtension function.
|
||||
*/
|
||||
static bool
|
||||
DoesCascadeDropUnsupportedObject(Oid classId, Oid objectId, HTAB *nodeMap)
|
||||
{
|
||||
bool found = false;
|
||||
hash_search(nodeMap, &objectId, HASH_ENTER, &found);
|
||||
|
||||
if (found)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
ObjectAddress objectAddress = { 0 };
|
||||
ObjectAddressSet(objectAddress, classId, objectId);
|
||||
|
||||
if (IsObjectAddressOwnedByExtension(&objectAddress, NULL))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
Oid targetObjectClassId = classId;
|
||||
Oid targetObjectId = objectId;
|
||||
List *dependencyTupleList = GetPgDependTuplesForDependingObjects(targetObjectClassId,
|
||||
targetObjectId);
|
||||
|
||||
HeapTuple depTup = NULL;
|
||||
foreach_ptr(depTup, dependencyTupleList)
|
||||
{
|
||||
Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup);
|
||||
|
||||
Oid dependingOid = InvalidOid;
|
||||
Oid dependingClassId = InvalidOid;
|
||||
|
||||
if (pg_depend->classid == RewriteRelationId)
|
||||
{
|
||||
dependingOid = GetDependingView(pg_depend);
|
||||
dependingClassId = RelationRelationId;
|
||||
}
|
||||
else
|
||||
{
|
||||
dependingOid = pg_depend->objid;
|
||||
dependingClassId = pg_depend->classid;
|
||||
}
|
||||
|
||||
if (DoesCascadeDropUnsupportedObject(dependingClassId, dependingOid, nodeMap))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetViewCreationCommandsOfTable takes a table oid generates the CREATE VIEW
|
||||
* commands for views that depend to the given table. This includes the views
|
||||
|
@ -1134,7 +1292,7 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands,
|
|||
{
|
||||
if (!suppressNoticeMessages)
|
||||
{
|
||||
ereport(NOTICE, (errmsg("Moving the data of %s",
|
||||
ereport(NOTICE, (errmsg("moving the data of %s",
|
||||
quote_qualified_identifier(schemaName, sourceName))));
|
||||
}
|
||||
|
||||
|
@ -1207,7 +1365,7 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands,
|
|||
|
||||
if (!suppressNoticeMessages)
|
||||
{
|
||||
ereport(NOTICE, (errmsg("Dropping the old %s",
|
||||
ereport(NOTICE, (errmsg("dropping the old %s",
|
||||
quote_qualified_identifier(schemaName, sourceName))));
|
||||
}
|
||||
|
||||
|
@ -1218,7 +1376,7 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands,
|
|||
|
||||
if (!suppressNoticeMessages)
|
||||
{
|
||||
ereport(NOTICE, (errmsg("Renaming the new table to %s",
|
||||
ereport(NOTICE, (errmsg("renaming the new table to %s",
|
||||
quote_qualified_identifier(schemaName, sourceName))));
|
||||
}
|
||||
|
||||
|
@ -1572,3 +1730,132 @@ ExecuteQueryViaSPI(char *query, int SPIOK)
|
|||
ereport(ERROR, (errmsg("could not finish SPI connection")));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SwitchToSequentialAndLocalExecutionIfRelationNameTooLong generates the longest shard name
|
||||
* on the shards of a distributed table, and if exceeds the limit switches to sequential and
|
||||
* local execution to prevent self-deadlocks.
|
||||
*
|
||||
* In case of a RENAME, the relation name parameter should store the new table name, so
|
||||
* that the function can generate shard names of the renamed relations
|
||||
*/
|
||||
void
|
||||
SwitchToSequentialAndLocalExecutionIfRelationNameTooLong(Oid relationId,
|
||||
char *finalRelationName)
|
||||
{
|
||||
if (!IsCitusTable(relationId))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (ShardIntervalCount(relationId) == 0)
|
||||
{
|
||||
/*
|
||||
* Relation has no shards, so we cannot run into "long shard relation
|
||||
* name" issue.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
char *longestShardName = GetLongestShardName(relationId, finalRelationName);
|
||||
bool switchedToSequentialAndLocalExecution =
|
||||
SwitchToSequentialAndLocalExecutionIfShardNameTooLong(finalRelationName,
|
||||
longestShardName);
|
||||
|
||||
if (switchedToSequentialAndLocalExecution)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (PartitionedTable(relationId))
|
||||
{
|
||||
Oid longestNamePartitionId = PartitionWithLongestNameRelationId(relationId);
|
||||
if (!OidIsValid(longestNamePartitionId))
|
||||
{
|
||||
/* no partitions have been created yet */
|
||||
return;
|
||||
}
|
||||
|
||||
char *longestPartitionName = get_rel_name(longestNamePartitionId);
|
||||
char *longestPartitionShardName = NULL;
|
||||
|
||||
/*
|
||||
* Use the shardId values of the partition if it is distributed, otherwise use
|
||||
* hypothetical values
|
||||
*/
|
||||
if (IsCitusTable(longestNamePartitionId) &&
|
||||
ShardIntervalCount(longestNamePartitionId) > 0)
|
||||
{
|
||||
longestPartitionShardName =
|
||||
GetLongestShardName(longestNamePartitionId, longestPartitionName);
|
||||
}
|
||||
else
|
||||
{
|
||||
longestPartitionShardName =
|
||||
GetLongestShardNameForLocalPartition(relationId, longestPartitionName);
|
||||
}
|
||||
|
||||
SwitchToSequentialAndLocalExecutionIfShardNameTooLong(longestPartitionName,
|
||||
longestPartitionShardName);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SwitchToSequentialAndLocalExecutionIfShardNameTooLong switches to sequential and local
|
||||
* execution if the shard name is too long.
|
||||
*
|
||||
* returns true if switched to sequential and local execution.
|
||||
*/
|
||||
static bool
|
||||
SwitchToSequentialAndLocalExecutionIfShardNameTooLong(char *relationName,
|
||||
char *longestShardName)
|
||||
{
|
||||
if (strlen(longestShardName) >= NAMEDATALEN - 1)
|
||||
{
|
||||
if (ParallelQueryExecutedInTransaction())
|
||||
{
|
||||
/*
|
||||
* If there has already been a parallel query executed, the sequential mode
|
||||
* would still use the already opened parallel connections to the workers,
|
||||
* thus contradicting our purpose of using sequential mode.
|
||||
*/
|
||||
ereport(ERROR, (errmsg(
|
||||
"Shard name (%s) for table (%s) is too long and could "
|
||||
"lead to deadlocks when executed in a transaction "
|
||||
"block after a parallel query", longestShardName,
|
||||
relationName),
|
||||
errhint("Try re-running the transaction with "
|
||||
"\"SET LOCAL citus.multi_shard_modify_mode TO "
|
||||
"\'sequential\';\"")));
|
||||
}
|
||||
else
|
||||
{
|
||||
elog(DEBUG1, "the name of the shard (%s) for relation (%s) is too long, "
|
||||
"switching to sequential and local execution mode to prevent "
|
||||
"self deadlocks",
|
||||
longestShardName, relationName);
|
||||
|
||||
SetLocalMultiShardModifyModeToSequential();
|
||||
SetLocalExecutionStatus(LOCAL_EXECUTION_REQUIRED);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SwitchToSequentialAndLocalExecutionIfPartitionNameTooLong is a wrapper for new
|
||||
* partitions that will be distributed after attaching to a distributed partitioned table
|
||||
*/
|
||||
void
|
||||
SwitchToSequentialAndLocalExecutionIfPartitionNameTooLong(Oid parentRelationId,
|
||||
Oid partitionRelationId)
|
||||
{
|
||||
SwitchToSequentialAndLocalExecutionIfRelationNameTooLong(
|
||||
parentRelationId, get_rel_name(partitionRelationId));
|
||||
}
|
||||
|
|
|
@ -510,6 +510,6 @@ ExecuteForeignKeyCreateCommand(const char *commandString, bool skip_validation)
|
|||
"command \"%s\"", commandString)));
|
||||
}
|
||||
|
||||
ProcessUtilityParseTree(parseTree, commandString, PROCESS_UTILITY_TOPLEVEL,
|
||||
ProcessUtilityParseTree(parseTree, commandString, PROCESS_UTILITY_QUERY,
|
||||
NULL, None_Receiver, NULL);
|
||||
}
|
||||
|
|
|
@ -412,6 +412,24 @@ CreateDistributedTable(Oid relationId, Var *distributionColumn, char distributio
|
|||
char replicationModel = DecideReplicationModel(distributionMethod,
|
||||
viaDeprecatedAPI);
|
||||
|
||||
|
||||
/*
|
||||
* Due to dropping columns, the parent's distribution key may not match the
|
||||
* partition's distribution key. The input distributionColumn belongs to
|
||||
* the parent. That's why we override the distribution column of partitions
|
||||
* here. See issue #5123 for details.
|
||||
*/
|
||||
if (PartitionTable(relationId))
|
||||
{
|
||||
Oid parentRelationId = PartitionParentOid(relationId);
|
||||
char *distributionColumnName =
|
||||
ColumnToColumnName(parentRelationId, nodeToString(distributionColumn));
|
||||
|
||||
distributionColumn =
|
||||
FindColumnWithNameOnTargetRelation(parentRelationId, distributionColumnName,
|
||||
relationId);
|
||||
}
|
||||
|
||||
/*
|
||||
* ColocationIdForNewTable assumes caller acquires lock on relationId. In our case,
|
||||
* our caller already acquired lock on relationId.
|
||||
|
|
|
@ -411,15 +411,16 @@ static char *
|
|||
GenerateLongestShardPartitionIndexName(IndexStmt *createIndexStatement)
|
||||
{
|
||||
Oid relationId = CreateIndexStmtGetRelationId(createIndexStatement);
|
||||
char *longestPartitionName = LongestPartitionName(relationId);
|
||||
if (longestPartitionName == NULL)
|
||||
Oid longestNamePartitionId = PartitionWithLongestNameRelationId(relationId);
|
||||
if (!OidIsValid(longestNamePartitionId))
|
||||
{
|
||||
/* no partitions have been created yet */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *longestPartitionShardName = pstrdup(longestPartitionName);
|
||||
ShardInterval *shardInterval = LoadShardIntervalWithLongestShardName(relationId);
|
||||
char *longestPartitionShardName = get_rel_name(longestNamePartitionId);
|
||||
ShardInterval *shardInterval = LoadShardIntervalWithLongestShardName(
|
||||
longestNamePartitionId);
|
||||
AppendShardIdToName(&longestPartitionShardName, shardInterval->shardId);
|
||||
|
||||
IndexStmt *createLongestShardIndexStmt = copyObject(createIndexStatement);
|
||||
|
|
|
@ -2244,7 +2244,7 @@ CitusCopyDestReceiverStartup(DestReceiver *dest, int operation,
|
|||
if (cacheEntry->replicationModel == REPLICATION_MODEL_2PC ||
|
||||
MultiShardCommitProtocol == COMMIT_PROTOCOL_2PC)
|
||||
{
|
||||
CoordinatedTransactionUse2PC();
|
||||
CoordinatedTransactionShouldUse2PC();
|
||||
}
|
||||
|
||||
/* define how tuples will be serialised */
|
||||
|
|
|
@ -109,6 +109,13 @@ PreprocessRenameStmt(Node *node, const char *renameCommand,
|
|||
*/
|
||||
ErrorIfUnsupportedRenameStmt(renameStmt);
|
||||
|
||||
if (renameStmt->renameType == OBJECT_TABLE ||
|
||||
renameStmt->renameType == OBJECT_FOREIGN_TABLE)
|
||||
{
|
||||
SwitchToSequentialAndLocalExecutionIfRelationNameTooLong(tableRelationId,
|
||||
renameStmt->newname);
|
||||
}
|
||||
|
||||
DDLJob *ddlJob = palloc0(sizeof(DDLJob));
|
||||
ddlJob->targetRelationId = tableRelationId;
|
||||
ddlJob->concurrentIndexCmd = false;
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "distributed/commands/utility_hook.h"
|
||||
#include "distributed/deparser.h"
|
||||
#include "distributed/deparse_shard_query.h"
|
||||
#include "distributed/distribution_column.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/metadata_sync.h"
|
||||
|
@ -324,6 +325,9 @@ PostprocessCreateTableStmtPartitionOf(CreateStmt *createStatement, const
|
|||
char *parentRelationName = generate_qualified_relation_name(parentRelationId);
|
||||
bool viaDeprecatedAPI = false;
|
||||
|
||||
SwitchToSequentialAndLocalExecutionIfPartitionNameTooLong(parentRelationId,
|
||||
relationId);
|
||||
|
||||
CreateDistributedTable(relationId, parentDistributionColumn,
|
||||
parentDistributionMethod, ShardCount,
|
||||
parentRelationName, viaDeprecatedAPI);
|
||||
|
@ -398,6 +402,9 @@ PostprocessAlterTableStmtAttachPartition(AlterTableStmt *alterTableStatement,
|
|||
char *parentRelationName = generate_qualified_relation_name(relationId);
|
||||
bool viaDeprecatedAPI = false;
|
||||
|
||||
SwitchToSequentialAndLocalExecutionIfPartitionNameTooLong(
|
||||
relationId, partitionRelationId);
|
||||
|
||||
CreateDistributedTable(partitionRelationId, distributionColumn,
|
||||
distributionMethod, ShardCount,
|
||||
parentRelationName, viaDeprecatedAPI);
|
||||
|
|
|
@ -910,6 +910,26 @@ ExecuteDistributedDDLJob(DDLJob *ddlJob)
|
|||
*/
|
||||
if (ddlJob->startNewTransaction)
|
||||
{
|
||||
/*
|
||||
* If cache is not populated, system catalog lookups will cause
|
||||
* the xmin of current backend to change. Then the last phase
|
||||
* of CREATE INDEX CONCURRENTLY, which is in a separate backend,
|
||||
* will hang waiting for our backend and result in a deadlock.
|
||||
*
|
||||
* We populate the cache before starting the next transaction to
|
||||
* avoid this. Most of the metadata has already been resolved in
|
||||
* planning phase, we only need to lookup metadata needed for
|
||||
* connection establishment.
|
||||
*/
|
||||
(void) CurrentDatabaseName();
|
||||
|
||||
/*
|
||||
* ConnParams (AuthInfo and PoolInfo) gets a snapshot, which
|
||||
* will blocks the remote connections to localhost. Hence we warm up
|
||||
* the cache here so that after we start a new transaction, the entries
|
||||
* will already be in the hash table, hence we won't be holding any snapshots.
|
||||
*/
|
||||
WarmUpConnParamsHash();
|
||||
CommitTransactionCommand();
|
||||
StartTransactionCommand();
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
/* stores the string representation of our node connection GUC */
|
||||
char *NodeConninfo = "";
|
||||
char *LocalHostName = "localhost";
|
||||
|
||||
/* represents a list of libpq parameter settings */
|
||||
typedef struct ConnParamsInfo
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include "distributed/shared_connection_stats.h"
|
||||
#include "distributed/cancel_utils.h"
|
||||
#include "distributed/remote_commands.h"
|
||||
#include "distributed/time_constants.h"
|
||||
#include "distributed/version_compat.h"
|
||||
#include "distributed/worker_log_messages.h"
|
||||
#include "mb/pg_wchar.h"
|
||||
|
@ -43,6 +44,7 @@
|
|||
|
||||
int NodeConnectionTimeout = 30000;
|
||||
int MaxCachedConnectionsPerWorker = 1;
|
||||
int MaxCachedConnectionLifetime = 10 * MS_PER_MINUTE;
|
||||
|
||||
HTAB *ConnectionHash = NULL;
|
||||
HTAB *ConnParamsHash = NULL;
|
||||
|
@ -85,6 +87,7 @@ static WaitEventSet * WaitEventSetFromMultiConnectionStates(List *connections,
|
|||
static void CloseNotReadyMultiConnectionStates(List *connectionStates);
|
||||
static uint32 MultiConnectionStateEventMask(MultiConnectionPollState *connectionState);
|
||||
static void CitusPQFinish(MultiConnection *connection);
|
||||
static ConnParamsHashEntry * FindOrCreateConnParamsEntry(ConnectionHashKey *key);
|
||||
|
||||
/*
|
||||
* Initialize per-backend connection management infrastructure.
|
||||
|
@ -1127,9 +1130,62 @@ ConnectionHashCompare(const void *a, const void *b, Size keysize)
|
|||
static void
|
||||
StartConnectionEstablishment(MultiConnection *connection, ConnectionHashKey *key)
|
||||
{
|
||||
bool found = false;
|
||||
static uint64 connectionId = 1;
|
||||
|
||||
ConnParamsHashEntry *entry = FindOrCreateConnParamsEntry(key);
|
||||
|
||||
strlcpy(connection->hostname, key->hostname, MAX_NODE_LENGTH);
|
||||
connection->port = key->port;
|
||||
strlcpy(connection->database, key->database, NAMEDATALEN);
|
||||
strlcpy(connection->user, key->user, NAMEDATALEN);
|
||||
|
||||
connection->pgConn = PQconnectStartParams((const char **) entry->keywords,
|
||||
(const char **) entry->values,
|
||||
false);
|
||||
connection->connectionStart = GetCurrentTimestamp();
|
||||
connection->connectionId = connectionId++;
|
||||
|
||||
/*
|
||||
* To avoid issues with interrupts not getting caught all our connections
|
||||
* are managed in a non-blocking manner. remote_commands.c provides
|
||||
* wrappers emulating blocking behaviour.
|
||||
*/
|
||||
PQsetnonblocking(connection->pgConn, true);
|
||||
|
||||
SetCitusNoticeReceiver(connection);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* WarmUpConnParamsHash warms up the ConnParamsHash by loading all the
|
||||
* conn params for active primary nodes.
|
||||
*/
|
||||
void
|
||||
WarmUpConnParamsHash(void)
|
||||
{
|
||||
List *workerNodeList = ActivePrimaryNodeList(AccessShareLock);
|
||||
WorkerNode *workerNode = NULL;
|
||||
foreach_ptr(workerNode, workerNodeList)
|
||||
{
|
||||
ConnectionHashKey key;
|
||||
strlcpy(key.hostname, workerNode->workerName, MAX_NODE_LENGTH);
|
||||
key.port = workerNode->workerPort;
|
||||
strlcpy(key.database, CurrentDatabaseName(), NAMEDATALEN);
|
||||
strlcpy(key.user, CurrentUserName(), NAMEDATALEN);
|
||||
FindOrCreateConnParamsEntry(&key);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* FindOrCreateConnParamsEntry searches ConnParamsHash for the given key,
|
||||
* if it is not found, it is created.
|
||||
*/
|
||||
static ConnParamsHashEntry *
|
||||
FindOrCreateConnParamsEntry(ConnectionHashKey *key)
|
||||
{
|
||||
bool found = false;
|
||||
|
||||
/* search our cache for precomputed connection settings */
|
||||
ConnParamsHashEntry *entry = hash_search(ConnParamsHash, key, HASH_ENTER, &found);
|
||||
if (!found || !entry->isValid)
|
||||
|
@ -1157,25 +1213,7 @@ StartConnectionEstablishment(MultiConnection *connection, ConnectionHashKey *key
|
|||
entry->isValid = true;
|
||||
}
|
||||
|
||||
strlcpy(connection->hostname, key->hostname, MAX_NODE_LENGTH);
|
||||
connection->port = key->port;
|
||||
strlcpy(connection->database, key->database, NAMEDATALEN);
|
||||
strlcpy(connection->user, key->user, NAMEDATALEN);
|
||||
|
||||
connection->pgConn = PQconnectStartParams((const char **) entry->keywords,
|
||||
(const char **) entry->values,
|
||||
false);
|
||||
connection->connectionStart = GetCurrentTimestamp();
|
||||
connection->connectionId = connectionId++;
|
||||
|
||||
/*
|
||||
* To avoid issues with interrupts not getting caught all our connections
|
||||
* are managed in a non-blocking manner. remote_commands.c provides
|
||||
* wrappers emulating blocking behaviour.
|
||||
*/
|
||||
PQsetnonblocking(connection->pgConn, true);
|
||||
|
||||
SetCitusNoticeReceiver(connection);
|
||||
return entry;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1288,6 +1326,7 @@ AfterXactHostConnectionHandling(ConnectionHashEntry *entry, bool isCommit)
|
|||
* - Connection is forced to close at the end of transaction
|
||||
* - Connection is not in OK state
|
||||
* - A transaction is still in progress (usually because we are cancelling a distributed transaction)
|
||||
* - A connection reached its maximum lifetime
|
||||
*/
|
||||
static bool
|
||||
ShouldShutdownConnection(MultiConnection *connection, const int cachedConnectionCount)
|
||||
|
@ -1303,7 +1342,10 @@ ShouldShutdownConnection(MultiConnection *connection, const int cachedConnection
|
|||
cachedConnectionCount >= MaxCachedConnectionsPerWorker ||
|
||||
connection->forceCloseAtTransactionEnd ||
|
||||
PQstatus(connection->pgConn) != CONNECTION_OK ||
|
||||
!RemoteTransactionIdle(connection);
|
||||
!RemoteTransactionIdle(connection) ||
|
||||
(MaxCachedConnectionLifetime >= 0 &&
|
||||
TimestampDifferenceExceeds(connection->connectionStart, GetCurrentTimestamp(),
|
||||
MaxCachedConnectionLifetime));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -25,7 +25,11 @@
|
|||
#include "utils/palloc.h"
|
||||
|
||||
|
||||
#define MAX_PUT_COPY_DATA_BUFFER_SIZE (8 * 1024 * 1024)
|
||||
/*
|
||||
* Setting that controls how many bytes of COPY data libpq is allowed to buffer
|
||||
* internally before we force a flush.
|
||||
*/
|
||||
int RemoteCopyFlushThreshold = 8 * 1024 * 1024;
|
||||
|
||||
|
||||
/* GUC, determining whether statements sent to remote nodes are logged */
|
||||
|
@ -620,7 +624,7 @@ PutRemoteCopyData(MultiConnection *connection, const char *buffer, int nbytes)
|
|||
*/
|
||||
|
||||
connection->copyBytesWrittenSinceLastFlush += nbytes;
|
||||
if (connection->copyBytesWrittenSinceLastFlush > MAX_PUT_COPY_DATA_BUFFER_SIZE)
|
||||
if (connection->copyBytesWrittenSinceLastFlush > RemoteCopyFlushThreshold)
|
||||
{
|
||||
connection->copyBytesWrittenSinceLastFlush = 0;
|
||||
return FinishConnectionIO(connection, allowInterrupts);
|
||||
|
|
|
@ -7055,9 +7055,10 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
|
|||
ExtractRangeTblExtraData(rte, NULL, &fragmentSchemaName, &fragmentTableName, NULL);
|
||||
|
||||
/* use schema and table name from the remote alias */
|
||||
appendStringInfoString(buf,
|
||||
generate_fragment_name(fragmentSchemaName,
|
||||
fragmentTableName));
|
||||
appendStringInfo(buf, "%s%s",
|
||||
only_marker(rte),
|
||||
generate_fragment_name(fragmentSchemaName,
|
||||
fragmentTableName));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -7057,9 +7057,10 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
|
|||
ExtractRangeTblExtraData(rte, NULL, &fragmentSchemaName, &fragmentTableName, NULL);
|
||||
|
||||
/* use schema and table name from the remote alias */
|
||||
appendStringInfoString(buf,
|
||||
generate_fragment_name(fragmentSchemaName,
|
||||
fragmentTableName));
|
||||
appendStringInfo(buf, "%s%s",
|
||||
only_marker(rte),
|
||||
generate_fragment_name(fragmentSchemaName,
|
||||
fragmentTableName));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -7115,9 +7115,10 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
|
|||
ExtractRangeTblExtraData(rte, NULL, &fragmentSchemaName, &fragmentTableName, NULL);
|
||||
|
||||
/* use schema and table name from the remote alias */
|
||||
appendStringInfoString(buf,
|
||||
generate_fragment_name(fragmentSchemaName,
|
||||
fragmentTableName));
|
||||
appendStringInfo(buf, "%s%s",
|
||||
only_marker(rte),
|
||||
generate_fragment_name(fragmentSchemaName,
|
||||
fragmentTableName));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -174,6 +174,8 @@
|
|||
#include "utils/timestamp.h"
|
||||
|
||||
#define SLOW_START_DISABLED 0
|
||||
#define WAIT_EVENT_SET_INDEX_NOT_INITIALIZED -1
|
||||
#define WAIT_EVENT_SET_INDEX_FAILED -2
|
||||
|
||||
|
||||
/*
|
||||
|
@ -638,6 +640,10 @@ static int UsableConnectionCount(WorkerPool *workerPool);
|
|||
static long NextEventTimeout(DistributedExecution *execution);
|
||||
static WaitEventSet * BuildWaitEventSet(List *sessionList);
|
||||
static void RebuildWaitEventSetFlags(WaitEventSet *waitEventSet, List *sessionList);
|
||||
static int CitusAddWaitEventSetToSet(WaitEventSet *set, uint32 events, pgsocket fd,
|
||||
Latch *latch, void *user_data);
|
||||
static bool CitusModifyWaitEvent(WaitEventSet *set, int pos, uint32 events,
|
||||
Latch *latch);
|
||||
static TaskPlacementExecution * PopPlacementExecution(WorkerSession *session);
|
||||
static TaskPlacementExecution * PopAssignedPlacementExecution(WorkerSession *session);
|
||||
static TaskPlacementExecution * PopUnassignedPlacementExecution(WorkerPool *workerPool);
|
||||
|
@ -671,6 +677,8 @@ static void ExtractParametersForRemoteExecution(ParamListInfo paramListInfo,
|
|||
Oid **parameterTypes,
|
||||
const char ***parameterValues);
|
||||
static int GetEventSetSize(List *sessionList);
|
||||
static bool ProcessSessionsWithFailedWaitEventSetOperations(
|
||||
DistributedExecution *execution);
|
||||
static int RebuildWaitEventSet(DistributedExecution *execution);
|
||||
static void ProcessWaitEvents(DistributedExecution *execution, WaitEvent *events, int
|
||||
eventCount, bool *cancellationReceived);
|
||||
|
@ -1165,23 +1173,6 @@ DecideTransactionPropertiesForTaskList(RowModifyLevel modLevel, List *taskList,
|
|||
return xactProperties;
|
||||
}
|
||||
|
||||
if (GetCurrentLocalExecutionStatus() == LOCAL_EXECUTION_REQUIRED)
|
||||
{
|
||||
/*
|
||||
* In case localExecutionHappened, we force the executor to use 2PC.
|
||||
* The primary motivation is that at this point we're definitely expanding
|
||||
* the nodes participated in the transaction. And, by re-generating the
|
||||
* remote task lists during local query execution, we might prevent the adaptive
|
||||
* executor to kick-in 2PC (or even start coordinated transaction, that's why
|
||||
* we prefer adding this check here instead of
|
||||
* Activate2PCIfModifyingTransactionExpandsToNewNode()).
|
||||
*/
|
||||
xactProperties.errorOnAnyFailure = true;
|
||||
xactProperties.useRemoteTransactionBlocks = TRANSACTION_BLOCKS_REQUIRED;
|
||||
xactProperties.requires2PC = true;
|
||||
return xactProperties;
|
||||
}
|
||||
|
||||
if (DistributedExecutionRequiresRollback(taskList))
|
||||
{
|
||||
/* transaction blocks are required if the task list needs to roll back */
|
||||
|
@ -1240,7 +1231,7 @@ StartDistributedExecution(DistributedExecution *execution)
|
|||
|
||||
if (xactProperties->requires2PC)
|
||||
{
|
||||
CoordinatedTransactionUse2PC();
|
||||
CoordinatedTransactionShouldUse2PC();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2092,6 +2083,7 @@ FindOrCreateWorkerSession(WorkerPool *workerPool, MultiConnection *connection)
|
|||
session->connection = connection;
|
||||
session->workerPool = workerPool;
|
||||
session->commandsSent = 0;
|
||||
session->waitEventSetIndex = WAIT_EVENT_SET_INDEX_NOT_INITIALIZED;
|
||||
|
||||
dlist_init(&session->pendingTaskQueue);
|
||||
dlist_init(&session->readyTaskQueue);
|
||||
|
@ -2236,6 +2228,7 @@ RunDistributedExecution(DistributedExecution *execution)
|
|||
ManageWorkerPool(workerPool);
|
||||
}
|
||||
|
||||
bool skipWaitEvents = false;
|
||||
if (execution->remoteTaskList == NIL)
|
||||
{
|
||||
/*
|
||||
|
@ -2257,11 +2250,28 @@ RunDistributedExecution(DistributedExecution *execution)
|
|||
}
|
||||
eventSetSize = RebuildWaitEventSet(execution);
|
||||
events = palloc0(eventSetSize * sizeof(WaitEvent));
|
||||
|
||||
skipWaitEvents =
|
||||
ProcessSessionsWithFailedWaitEventSetOperations(execution);
|
||||
}
|
||||
else if (execution->waitFlagsChanged)
|
||||
{
|
||||
RebuildWaitEventSetFlags(execution->waitEventSet, execution->sessionList);
|
||||
execution->waitFlagsChanged = false;
|
||||
|
||||
skipWaitEvents =
|
||||
ProcessSessionsWithFailedWaitEventSetOperations(execution);
|
||||
}
|
||||
|
||||
if (skipWaitEvents)
|
||||
{
|
||||
/*
|
||||
* Some operation on the wait event set is failed, retry
|
||||
* as we already removed the problematic connections.
|
||||
*/
|
||||
execution->rebuildWaitEventSet = true;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
/* wait for I/O events */
|
||||
|
@ -2310,6 +2320,51 @@ RunDistributedExecution(DistributedExecution *execution)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ProcessSessionsWithFailedEventSetOperations goes over the session list and
|
||||
* processes sessions with failed wait event set operations.
|
||||
*
|
||||
* Failed sessions are not going to generate any further events, so it is our
|
||||
* only chance to process the failure by calling into `ConnectionStateMachine`.
|
||||
*
|
||||
* The function returns true if any session failed.
|
||||
*/
|
||||
static bool
|
||||
ProcessSessionsWithFailedWaitEventSetOperations(DistributedExecution *execution)
|
||||
{
|
||||
bool foundFailedSession = false;
|
||||
WorkerSession *session = NULL;
|
||||
foreach_ptr(session, execution->sessionList)
|
||||
{
|
||||
if (session->waitEventSetIndex == WAIT_EVENT_SET_INDEX_FAILED)
|
||||
{
|
||||
/*
|
||||
* We can only lost only already connected connections,
|
||||
* others are regular failures.
|
||||
*/
|
||||
MultiConnection *connection = session->connection;
|
||||
if (connection->connectionState == MULTI_CONNECTION_CONNECTED)
|
||||
{
|
||||
connection->connectionState = MULTI_CONNECTION_LOST;
|
||||
}
|
||||
else
|
||||
{
|
||||
connection->connectionState = MULTI_CONNECTION_FAILED;
|
||||
}
|
||||
|
||||
|
||||
ConnectionStateMachine(session);
|
||||
|
||||
session->waitEventSetIndex = WAIT_EVENT_SET_INDEX_NOT_INITIALIZED;
|
||||
|
||||
foundFailedSession = true;
|
||||
}
|
||||
}
|
||||
|
||||
return foundFailedSession;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RebuildWaitEventSet updates the waitEventSet for the distributed execution.
|
||||
* This happens when the connection set for the distributed execution is changed,
|
||||
|
@ -3197,7 +3252,7 @@ Activate2PCIfModifyingTransactionExpandsToNewNode(WorkerSession *session)
|
|||
* just opened, which means we're now going to make modifications
|
||||
* over multiple connections. Activate 2PC!
|
||||
*/
|
||||
CoordinatedTransactionUse2PC();
|
||||
CoordinatedTransactionShouldUse2PC();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3875,6 +3930,7 @@ ReceiveResults(WorkerSession *session, bool storeRows)
|
|||
TupleDesc tupleDescriptor = tupleDest->tupleDescForQuery(tupleDest, queryIndex);
|
||||
if (tupleDescriptor == NULL)
|
||||
{
|
||||
PQclear(result);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -4693,18 +4749,79 @@ BuildWaitEventSet(List *sessionList)
|
|||
continue;
|
||||
}
|
||||
|
||||
int waitEventSetIndex = AddWaitEventToSet(waitEventSet, connection->waitFlags,
|
||||
sock, NULL, (void *) session);
|
||||
int waitEventSetIndex =
|
||||
CitusAddWaitEventSetToSet(waitEventSet, connection->waitFlags, sock,
|
||||
NULL, (void *) session);
|
||||
session->waitEventSetIndex = waitEventSetIndex;
|
||||
}
|
||||
|
||||
AddWaitEventToSet(waitEventSet, WL_POSTMASTER_DEATH, PGINVALID_SOCKET, NULL, NULL);
|
||||
AddWaitEventToSet(waitEventSet, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch, NULL);
|
||||
CitusAddWaitEventSetToSet(waitEventSet, WL_POSTMASTER_DEATH, PGINVALID_SOCKET, NULL,
|
||||
NULL);
|
||||
CitusAddWaitEventSetToSet(waitEventSet, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch,
|
||||
NULL);
|
||||
|
||||
return waitEventSet;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CitusAddWaitEventSetToSet is a wrapper around Postgres' AddWaitEventToSet().
|
||||
*
|
||||
* AddWaitEventToSet() may throw hard errors. For example, when the
|
||||
* underlying socket for a connection is closed by the remote server
|
||||
* and already reflected by the OS, however Citus hasn't had a chance
|
||||
* to get this information. In that case, if replication factor is >1,
|
||||
* Citus can failover to other nodes for executing the query. Even if
|
||||
* replication factor = 1, Citus can give much nicer errors.
|
||||
*
|
||||
* So CitusAddWaitEventSetToSet simply puts ModifyWaitEvent into a
|
||||
* PG_TRY/PG_CATCH block in order to catch any hard errors, and
|
||||
* returns this information to the caller.
|
||||
*/
|
||||
static int
|
||||
CitusAddWaitEventSetToSet(WaitEventSet *set, uint32 events, pgsocket fd,
|
||||
Latch *latch, void *user_data)
|
||||
{
|
||||
volatile int waitEventSetIndex = WAIT_EVENT_SET_INDEX_NOT_INITIALIZED;
|
||||
MemoryContext savedContext = CurrentMemoryContext;
|
||||
|
||||
PG_TRY();
|
||||
{
|
||||
waitEventSetIndex =
|
||||
AddWaitEventToSet(set, events, fd, latch, (void *) user_data);
|
||||
}
|
||||
PG_CATCH();
|
||||
{
|
||||
/*
|
||||
* We might be in an arbitrary memory context when the
|
||||
* error is thrown and we should get back to one we had
|
||||
* at PG_TRY() time, especially because we are not
|
||||
* re-throwing the error.
|
||||
*/
|
||||
MemoryContextSwitchTo(savedContext);
|
||||
|
||||
FlushErrorState();
|
||||
|
||||
if (user_data != NULL)
|
||||
{
|
||||
WorkerSession *workerSession = (WorkerSession *) user_data;
|
||||
|
||||
ereport(DEBUG1, (errcode(ERRCODE_CONNECTION_FAILURE),
|
||||
errmsg("Adding wait event for node %s:%d failed. "
|
||||
"The socket was: %d",
|
||||
workerSession->workerPool->nodeName,
|
||||
workerSession->workerPool->nodePort, fd)));
|
||||
}
|
||||
|
||||
/* let the callers know about the failure */
|
||||
waitEventSetIndex = WAIT_EVENT_SET_INDEX_FAILED;
|
||||
}
|
||||
PG_END_TRY();
|
||||
|
||||
return waitEventSetIndex;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetEventSetSize returns the event set size for a list of sessions.
|
||||
*/
|
||||
|
@ -4748,11 +4865,68 @@ RebuildWaitEventSetFlags(WaitEventSet *waitEventSet, List *sessionList)
|
|||
continue;
|
||||
}
|
||||
|
||||
ModifyWaitEvent(waitEventSet, waitEventSetIndex, connection->waitFlags, NULL);
|
||||
bool success =
|
||||
CitusModifyWaitEvent(waitEventSet, waitEventSetIndex,
|
||||
connection->waitFlags, NULL);
|
||||
if (!success)
|
||||
{
|
||||
ereport(DEBUG1, (errcode(ERRCODE_CONNECTION_FAILURE),
|
||||
errmsg("Modifying wait event for node %s:%d failed. "
|
||||
"The wait event index was: %d",
|
||||
connection->hostname, connection->port,
|
||||
waitEventSetIndex)));
|
||||
|
||||
session->waitEventSetIndex = WAIT_EVENT_SET_INDEX_FAILED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CitusModifyWaitEvent is a wrapper around Postgres' ModifyWaitEvent().
|
||||
*
|
||||
* ModifyWaitEvent may throw hard errors. For example, when the underlying
|
||||
* socket for a connection is closed by the remote server and already
|
||||
* reflected by the OS, however Citus hasn't had a chance to get this
|
||||
* information. In that case, if repliction factor is >1, Citus can
|
||||
* failover to other nodes for executing the query. Even if replication
|
||||
* factor = 1, Citus can give much nicer errors.
|
||||
*
|
||||
* So CitusModifyWaitEvent simply puts ModifyWaitEvent into a PG_TRY/PG_CATCH
|
||||
* block in order to catch any hard errors, and returns this information to the
|
||||
* caller.
|
||||
*/
|
||||
static bool
|
||||
CitusModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
|
||||
{
|
||||
volatile bool success = true;
|
||||
MemoryContext savedContext = CurrentMemoryContext;
|
||||
|
||||
PG_TRY();
|
||||
{
|
||||
ModifyWaitEvent(set, pos, events, latch);
|
||||
}
|
||||
PG_CATCH();
|
||||
{
|
||||
/*
|
||||
* We might be in an arbitrary memory context when the
|
||||
* error is thrown and we should get back to one we had
|
||||
* at PG_TRY() time, especially because we are not
|
||||
* re-throwing the error.
|
||||
*/
|
||||
MemoryContextSwitchTo(savedContext);
|
||||
|
||||
FlushErrorState();
|
||||
|
||||
/* let the callers know about the failure */
|
||||
success = false;
|
||||
}
|
||||
PG_END_TRY();
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SetLocalForceMaxQueryParallelization is simply a C interface for setting
|
||||
* the following:
|
||||
|
|
|
@ -300,7 +300,8 @@ CitusBeginReadOnlyScan(CustomScanState *node, EState *estate, int eflags)
|
|||
* The plan will be cached across executions when originalDistributedPlan
|
||||
* represents a prepared statement.
|
||||
*/
|
||||
CacheLocalPlanForShardQuery(task, originalDistributedPlan);
|
||||
CacheLocalPlanForShardQuery(task, originalDistributedPlan,
|
||||
estate->es_param_list_info);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -342,9 +343,12 @@ CitusBeginModifyScan(CustomScanState *node, EState *estate, int eflags)
|
|||
/*
|
||||
* At this point, we're about to do the shard pruning for fast-path queries.
|
||||
* Given that pruning is deferred always for INSERTs, we get here
|
||||
* !EnableFastPathRouterPlanner as well.
|
||||
* !EnableFastPathRouterPlanner as well. Given that INSERT statements with
|
||||
* CTEs/sublinks etc are not eligible for fast-path router plan, we get here
|
||||
* jobQuery->commandType == CMD_INSERT as well.
|
||||
*/
|
||||
Assert(currentPlan->fastPathRouterPlan || !EnableFastPathRouterPlanner);
|
||||
Assert(currentPlan->fastPathRouterPlan || !EnableFastPathRouterPlanner ||
|
||||
jobQuery->commandType == CMD_INSERT);
|
||||
|
||||
/*
|
||||
* We can only now decide which shard to use, so we need to build a new task
|
||||
|
@ -406,7 +410,8 @@ CitusBeginModifyScan(CustomScanState *node, EState *estate, int eflags)
|
|||
* The plan will be cached across executions when originalDistributedPlan
|
||||
* represents a prepared statement.
|
||||
*/
|
||||
CacheLocalPlanForShardQuery(task, originalDistributedPlan);
|
||||
CacheLocalPlanForShardQuery(task, originalDistributedPlan,
|
||||
estate->es_param_list_info);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -125,9 +125,6 @@ static void LogLocalCommand(Task *task);
|
|||
static uint64 LocallyPlanAndExecuteMultipleQueries(List *queryStrings,
|
||||
TupleDestination *tupleDest,
|
||||
Task *task);
|
||||
static void ExtractParametersForLocalExecution(ParamListInfo paramListInfo,
|
||||
Oid **parameterTypes,
|
||||
const char ***parameterValues);
|
||||
static void ExecuteUdfTaskQuery(Query *localUdfCommandQuery);
|
||||
static void EnsureTransitionPossible(LocalExecutionStatus from,
|
||||
LocalExecutionStatus to);
|
||||
|
@ -209,6 +206,19 @@ ExecuteLocalTaskListExtended(List *taskList,
|
|||
Oid *parameterTypes = NULL;
|
||||
uint64 totalRowsProcessed = 0;
|
||||
|
||||
/*
|
||||
* Even if we are executing local tasks, we still enable
|
||||
* coordinated transaction. This is because
|
||||
* (a) we might be in a transaction, and the next commands may
|
||||
* require coordinated transaction
|
||||
* (b) we might be executing some tasks locally and the others
|
||||
* via remote execution
|
||||
*
|
||||
* Also, there is no harm enabling coordinated transaction even if
|
||||
* we only deal with local tasks in the transaction.
|
||||
*/
|
||||
UseCoordinatedTransaction();
|
||||
|
||||
if (paramListInfo != NULL)
|
||||
{
|
||||
/* not used anywhere, so declare here */
|
||||
|
@ -236,6 +246,17 @@ ExecuteLocalTaskListExtended(List *taskList,
|
|||
{
|
||||
SetLocalExecutionStatus(LOCAL_EXECUTION_REQUIRED);
|
||||
}
|
||||
|
||||
if (!ReadOnlyTask(task->taskType))
|
||||
{
|
||||
/*
|
||||
* Any modification on the local execution should enable 2PC. If remote
|
||||
* queries are also ReadOnly, our 2PC logic is smart enough to skip sending
|
||||
* PREPARE to those connections.
|
||||
*/
|
||||
CoordinatedTransactionShouldUse2PC();
|
||||
}
|
||||
|
||||
LogLocalCommand(task);
|
||||
|
||||
if (isUtilityCommand)
|
||||
|
@ -362,7 +383,7 @@ LocallyPlanAndExecuteMultipleQueries(List *queryStrings, TupleDestination *tuple
|
|||
* value arrays. It does not change the oid of custom types, because the
|
||||
* query will be run locally.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
ExtractParametersForLocalExecution(ParamListInfo paramListInfo, Oid **parameterTypes,
|
||||
const char ***parameterValues)
|
||||
{
|
||||
|
@ -406,7 +427,7 @@ ExecuteUtilityCommand(const char *taskQueryCommand)
|
|||
* process utility.
|
||||
*/
|
||||
ProcessUtilityParseTree(taskRawParseTree, taskQueryCommand,
|
||||
PROCESS_UTILITY_TOPLEVEL, NULL, None_Receiver,
|
||||
PROCESS_UTILITY_QUERY, NULL, None_Receiver,
|
||||
NULL);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -156,7 +156,6 @@ static void ApplyAddToDependencyList(ObjectAddressCollector *collector,
|
|||
static List * ExpandCitusSupportedTypes(ObjectAddressCollector *collector,
|
||||
ObjectAddress target);
|
||||
static ViewDependencyNode * BuildViewDependencyGraph(Oid relationId, HTAB *nodeMap);
|
||||
static Oid GetDependingView(Form_pg_depend pg_depend);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -1204,18 +1203,31 @@ GetDependingView(Form_pg_depend pg_depend)
|
|||
true, NULL, 1, rkey);
|
||||
|
||||
HeapTuple rewriteTup = systable_getnext(rscan);
|
||||
if (!HeapTupleIsValid(rewriteTup))
|
||||
{
|
||||
/*
|
||||
* This function already verified that objid's classid is
|
||||
* RewriteRelationId, so it should exists. But be on the
|
||||
* safe side.
|
||||
*/
|
||||
ereport(ERROR, (errmsg("catalog lookup failed for view %u",
|
||||
pg_depend->objid)));
|
||||
}
|
||||
|
||||
Form_pg_rewrite pg_rewrite = (Form_pg_rewrite) GETSTRUCT(rewriteTup);
|
||||
|
||||
bool isView = get_rel_relkind(pg_rewrite->ev_class) == RELKIND_VIEW;
|
||||
bool isMatView = get_rel_relkind(pg_rewrite->ev_class) == RELKIND_MATVIEW;
|
||||
bool isDifferentThanRef = pg_rewrite->ev_class != pg_depend->refobjid;
|
||||
|
||||
Oid dependingView = InvalidOid;
|
||||
if ((isView || isMatView) && isDifferentThanRef)
|
||||
{
|
||||
dependingView = pg_rewrite->ev_class;
|
||||
}
|
||||
|
||||
systable_endscan(rscan);
|
||||
relation_close(rewriteRel, AccessShareLock);
|
||||
|
||||
if ((isView || isMatView) && isDifferentThanRef)
|
||||
{
|
||||
return pg_rewrite->ev_class;
|
||||
}
|
||||
return InvalidOid;
|
||||
return dependingView;
|
||||
}
|
||||
|
|
|
@ -373,3 +373,56 @@ GetDistributedObjectAddressList(void)
|
|||
|
||||
return objectAddressList;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* UpdateDistributedObjectColocationId gets an old and a new colocationId
|
||||
* and updates the colocationId of all tuples in citus.pg_dist_object which
|
||||
* have the old colocationId to the new colocationId.
|
||||
*/
|
||||
void
|
||||
UpdateDistributedObjectColocationId(uint32 oldColocationId,
|
||||
uint32 newColocationId)
|
||||
{
|
||||
const bool indexOK = false;
|
||||
ScanKeyData scanKey[1];
|
||||
Relation pgDistObjectRel = table_open(DistObjectRelationId(),
|
||||
RowExclusiveLock);
|
||||
TupleDesc tupleDescriptor = RelationGetDescr(pgDistObjectRel);
|
||||
|
||||
/* scan pg_dist_object for colocationId equal to old colocationId */
|
||||
ScanKeyInit(&scanKey[0], Anum_pg_dist_object_colocationid,
|
||||
BTEqualStrategyNumber,
|
||||
F_INT4EQ, UInt32GetDatum(oldColocationId));
|
||||
|
||||
SysScanDesc scanDescriptor = systable_beginscan(pgDistObjectRel,
|
||||
InvalidOid,
|
||||
indexOK,
|
||||
NULL, 1, scanKey);
|
||||
HeapTuple heapTuple;
|
||||
while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor)))
|
||||
{
|
||||
Datum values[Natts_pg_dist_object];
|
||||
bool isnull[Natts_pg_dist_object];
|
||||
bool replace[Natts_pg_dist_object];
|
||||
|
||||
memset(replace, 0, sizeof(replace));
|
||||
|
||||
replace[Anum_pg_dist_object_colocationid - 1] = true;
|
||||
|
||||
/* update the colocationId to the new one */
|
||||
values[Anum_pg_dist_object_colocationid - 1] = UInt32GetDatum(newColocationId);
|
||||
|
||||
isnull[Anum_pg_dist_object_colocationid - 1] = false;
|
||||
|
||||
heapTuple = heap_modify_tuple(heapTuple, tupleDescriptor, values, isnull,
|
||||
replace);
|
||||
|
||||
CatalogTupleUpdate(pgDistObjectRel, &heapTuple->t_self, heapTuple);
|
||||
CitusInvalidateRelcacheByRelid(DistObjectRelationId());
|
||||
}
|
||||
|
||||
systable_endscan(scanDescriptor);
|
||||
table_close(pgDistObjectRel, NoLock);
|
||||
CommandCounterIncrement();
|
||||
}
|
||||
|
|
|
@ -79,14 +79,24 @@ static bool DistributedTableSizeOnWorker(WorkerNode *workerNode, Oid relationId,
|
|||
char *sizeQuery, bool failOnError,
|
||||
uint64 *tableSize);
|
||||
static List * ShardIntervalsOnWorkerGroup(WorkerNode *workerNode, Oid relationId);
|
||||
static char * GenerateShardNameAndSizeQueryForShardList(List *shardIntervalList);
|
||||
static char * GenerateAllShardNameAndSizeQueryForNode(WorkerNode *workerNode);
|
||||
static List * GenerateShardSizesQueryList(List *workerNodeList);
|
||||
static char * GenerateShardStatisticsQueryForShardList(List *shardIntervalList, bool
|
||||
useShardMinMaxQuery);
|
||||
static char * GenerateAllShardStatisticsQueryForNode(WorkerNode *workerNode,
|
||||
List *citusTableIds, bool
|
||||
useShardMinMaxQuery);
|
||||
static List * GenerateShardStatisticsQueryList(List *workerNodeList, List *citusTableIds,
|
||||
bool useShardMinMaxQuery);
|
||||
static void ErrorIfNotSuitableToGetSize(Oid relationId);
|
||||
static List * OpenConnectionToNodes(List *workerNodeList);
|
||||
static void ReceiveShardNameAndSizeResults(List *connectionList,
|
||||
Tuplestorestate *tupleStore,
|
||||
TupleDesc tupleDescriptor);
|
||||
static void AppendShardSizeMinMaxQuery(StringInfo selectQuery, uint64 shardId,
|
||||
ShardInterval *
|
||||
shardInterval, char *shardName,
|
||||
char *quotedShardName);
|
||||
static void AppendShardSizeQuery(StringInfo selectQuery, ShardInterval *shardInterval,
|
||||
char *quotedShardName);
|
||||
|
||||
/* exports for SQL callable functions */
|
||||
PG_FUNCTION_INFO_V1(citus_table_size);
|
||||
|
@ -102,25 +112,16 @@ citus_shard_sizes(PG_FUNCTION_ARGS)
|
|||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
List *workerNodeList = ActivePrimaryNodeList(NoLock);
|
||||
List *allCitusTableIds = AllCitusTableIds();
|
||||
|
||||
List *shardSizesQueryList = GenerateShardSizesQueryList(workerNodeList);
|
||||
/* we don't need a distributed transaction here */
|
||||
bool useDistributedTransaction = false;
|
||||
|
||||
List *connectionList = OpenConnectionToNodes(workerNodeList);
|
||||
FinishConnectionListEstablishment(connectionList);
|
||||
|
||||
|
||||
/* send commands in parallel */
|
||||
for (int i = 0; i < list_length(connectionList); i++)
|
||||
{
|
||||
MultiConnection *connection = (MultiConnection *) list_nth(connectionList, i);
|
||||
char *shardSizesQuery = (char *) list_nth(shardSizesQueryList, i);
|
||||
int querySent = SendRemoteCommand(connection, shardSizesQuery);
|
||||
if (querySent == 0)
|
||||
{
|
||||
ReportConnectionError(connection, WARNING);
|
||||
}
|
||||
}
|
||||
/* we only want the shard sizes here so useShardMinMaxQuery parameter is false */
|
||||
bool useShardMinMaxQuery = false;
|
||||
List *connectionList = SendShardStatisticsQueriesInParallel(allCitusTableIds,
|
||||
useDistributedTransaction,
|
||||
useShardMinMaxQuery);
|
||||
|
||||
TupleDesc tupleDescriptor = NULL;
|
||||
Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor);
|
||||
|
@ -225,6 +226,59 @@ citus_relation_size(PG_FUNCTION_ARGS)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* SendShardStatisticsQueriesInParallel generates query lists for obtaining shard
|
||||
* statistics and then sends the commands in parallel by opening connections
|
||||
* to available nodes. It returns the connection list.
|
||||
*/
|
||||
List *
|
||||
SendShardStatisticsQueriesInParallel(List *citusTableIds, bool useDistributedTransaction,
|
||||
bool
|
||||
useShardMinMaxQuery)
|
||||
{
|
||||
List *workerNodeList = ActivePrimaryNodeList(NoLock);
|
||||
|
||||
List *shardSizesQueryList = GenerateShardStatisticsQueryList(workerNodeList,
|
||||
citusTableIds,
|
||||
useShardMinMaxQuery);
|
||||
|
||||
List *connectionList = OpenConnectionToNodes(workerNodeList);
|
||||
FinishConnectionListEstablishment(connectionList);
|
||||
|
||||
if (useDistributedTransaction)
|
||||
{
|
||||
/*
|
||||
* For now, in the case we want to include shard min and max values, we also
|
||||
* want to update the entries in pg_dist_placement and pg_dist_shard with the
|
||||
* latest statistics. In order to detect distributed deadlocks, we assign a
|
||||
* distributed transaction ID to the current transaction
|
||||
*/
|
||||
UseCoordinatedTransaction();
|
||||
}
|
||||
|
||||
/* send commands in parallel */
|
||||
for (int i = 0; i < list_length(connectionList); i++)
|
||||
{
|
||||
MultiConnection *connection = (MultiConnection *) list_nth(connectionList, i);
|
||||
char *shardSizesQuery = (char *) list_nth(shardSizesQueryList, i);
|
||||
|
||||
if (useDistributedTransaction)
|
||||
{
|
||||
/* run the size query in a distributed transaction */
|
||||
RemoteTransactionBeginIfNecessary(connection);
|
||||
}
|
||||
|
||||
int querySent = SendRemoteCommand(connection, shardSizesQuery);
|
||||
|
||||
if (querySent == 0)
|
||||
{
|
||||
ReportConnectionError(connection, WARNING);
|
||||
}
|
||||
}
|
||||
return connectionList;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* OpenConnectionToNodes opens a single connection per node
|
||||
* for the given workerNodeList.
|
||||
|
@ -250,20 +304,25 @@ OpenConnectionToNodes(List *workerNodeList)
|
|||
|
||||
|
||||
/*
|
||||
* GenerateShardSizesQueryList generates a query per node that
|
||||
* will return all shard_name, shard_size pairs from the node.
|
||||
* GenerateShardStatisticsQueryList generates a query per node that will return:
|
||||
* - all shard_name, shard_size pairs from the node (if includeShardMinMax is false)
|
||||
* - all shard_id, shard_minvalue, shard_maxvalue, shard_size quartuples from the node (if true)
|
||||
*/
|
||||
static List *
|
||||
GenerateShardSizesQueryList(List *workerNodeList)
|
||||
GenerateShardStatisticsQueryList(List *workerNodeList, List *citusTableIds, bool
|
||||
useShardMinMaxQuery)
|
||||
{
|
||||
List *shardSizesQueryList = NIL;
|
||||
List *shardStatisticsQueryList = NIL;
|
||||
WorkerNode *workerNode = NULL;
|
||||
foreach_ptr(workerNode, workerNodeList)
|
||||
{
|
||||
char *shardSizesQuery = GenerateAllShardNameAndSizeQueryForNode(workerNode);
|
||||
shardSizesQueryList = lappend(shardSizesQueryList, shardSizesQuery);
|
||||
char *shardStatisticsQuery = GenerateAllShardStatisticsQueryForNode(workerNode,
|
||||
citusTableIds,
|
||||
useShardMinMaxQuery);
|
||||
shardStatisticsQueryList = lappend(shardStatisticsQueryList,
|
||||
shardStatisticsQuery);
|
||||
}
|
||||
return shardSizesQueryList;
|
||||
return shardStatisticsQueryList;
|
||||
}
|
||||
|
||||
|
||||
|
@ -572,37 +631,50 @@ GenerateSizeQueryOnMultiplePlacements(List *shardIntervalList, char *sizeQuery)
|
|||
|
||||
|
||||
/*
|
||||
* GenerateAllShardNameAndSizeQueryForNode generates a query that returns all
|
||||
* shard_name, shard_size pairs for the given node.
|
||||
* GenerateAllShardStatisticsQueryForNode generates a query that returns:
|
||||
* - all shard_name, shard_size pairs for the given node (if useShardMinMaxQuery is false)
|
||||
* - all shard_id, shard_minvalue, shard_maxvalue, shard_size quartuples (if true)
|
||||
*/
|
||||
static char *
|
||||
GenerateAllShardNameAndSizeQueryForNode(WorkerNode *workerNode)
|
||||
GenerateAllShardStatisticsQueryForNode(WorkerNode *workerNode, List *citusTableIds, bool
|
||||
useShardMinMaxQuery)
|
||||
{
|
||||
List *allCitusTableIds = AllCitusTableIds();
|
||||
|
||||
StringInfo allShardNameAndSizeQuery = makeStringInfo();
|
||||
StringInfo allShardStatisticsQuery = makeStringInfo();
|
||||
|
||||
Oid relationId = InvalidOid;
|
||||
foreach_oid(relationId, allCitusTableIds)
|
||||
foreach_oid(relationId, citusTableIds)
|
||||
{
|
||||
List *shardIntervalsOnNode = ShardIntervalsOnWorkerGroup(workerNode, relationId);
|
||||
char *shardNameAndSizeQuery =
|
||||
GenerateShardNameAndSizeQueryForShardList(shardIntervalsOnNode);
|
||||
appendStringInfoString(allShardNameAndSizeQuery, shardNameAndSizeQuery);
|
||||
char *shardStatisticsQuery =
|
||||
GenerateShardStatisticsQueryForShardList(shardIntervalsOnNode,
|
||||
useShardMinMaxQuery);
|
||||
appendStringInfoString(allShardStatisticsQuery, shardStatisticsQuery);
|
||||
}
|
||||
|
||||
/* Add a dummy entry so that UNION ALL doesn't complain */
|
||||
appendStringInfo(allShardNameAndSizeQuery, "SELECT NULL::text, 0::bigint;");
|
||||
return allShardNameAndSizeQuery->data;
|
||||
if (useShardMinMaxQuery)
|
||||
{
|
||||
/* 0 for shard_id, NULL for min, NULL for text, 0 for shard_size */
|
||||
appendStringInfo(allShardStatisticsQuery,
|
||||
"SELECT 0::bigint, NULL::text, NULL::text, 0::bigint;");
|
||||
}
|
||||
else
|
||||
{
|
||||
/* NULL for shard_name, 0 for shard_size */
|
||||
appendStringInfo(allShardStatisticsQuery, "SELECT NULL::text, 0::bigint;");
|
||||
}
|
||||
return allShardStatisticsQuery->data;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GenerateShardNameAndSizeQueryForShardList generates a SELECT shard_name - shard_size query to get
|
||||
* size of multiple tables.
|
||||
* GenerateShardStatisticsQueryForShardList generates one of the two types of queries:
|
||||
* - SELECT shard_name - shard_size (if useShardMinMaxQuery is false)
|
||||
* - SELECT shard_id, shard_minvalue, shard_maxvalue, shard_size (if true)
|
||||
*/
|
||||
static char *
|
||||
GenerateShardNameAndSizeQueryForShardList(List *shardIntervalList)
|
||||
GenerateShardStatisticsQueryForShardList(List *shardIntervalList, bool
|
||||
useShardMinMaxQuery)
|
||||
{
|
||||
StringInfo selectQuery = makeStringInfo();
|
||||
|
||||
|
@ -618,8 +690,15 @@ GenerateShardNameAndSizeQueryForShardList(List *shardIntervalList)
|
|||
char *shardQualifiedName = quote_qualified_identifier(schemaName, shardName);
|
||||
char *quotedShardName = quote_literal_cstr(shardQualifiedName);
|
||||
|
||||
appendStringInfo(selectQuery, "SELECT %s AS shard_name, ", quotedShardName);
|
||||
appendStringInfo(selectQuery, PG_RELATION_SIZE_FUNCTION, quotedShardName);
|
||||
if (useShardMinMaxQuery)
|
||||
{
|
||||
AppendShardSizeMinMaxQuery(selectQuery, shardId, shardInterval, shardName,
|
||||
quotedShardName);
|
||||
}
|
||||
else
|
||||
{
|
||||
AppendShardSizeQuery(selectQuery, shardInterval, quotedShardName);
|
||||
}
|
||||
appendStringInfo(selectQuery, " UNION ALL ");
|
||||
}
|
||||
|
||||
|
@ -627,6 +706,54 @@ GenerateShardNameAndSizeQueryForShardList(List *shardIntervalList)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendShardSizeMinMaxQuery appends a query in the following form to selectQuery
|
||||
* SELECT shard_id, shard_minvalue, shard_maxvalue, shard_size
|
||||
*/
|
||||
static void
|
||||
AppendShardSizeMinMaxQuery(StringInfo selectQuery, uint64 shardId,
|
||||
ShardInterval *shardInterval, char *shardName,
|
||||
char *quotedShardName)
|
||||
{
|
||||
if (IsCitusTableType(shardInterval->relationId, APPEND_DISTRIBUTED))
|
||||
{
|
||||
/* fill in the partition column name */
|
||||
const uint32 unusedTableId = 1;
|
||||
Var *partitionColumn = PartitionColumn(shardInterval->relationId,
|
||||
unusedTableId);
|
||||
char *partitionColumnName = get_attname(shardInterval->relationId,
|
||||
partitionColumn->varattno, false);
|
||||
appendStringInfo(selectQuery,
|
||||
"SELECT " UINT64_FORMAT
|
||||
" AS shard_id, min(%s)::text AS shard_minvalue, max(%s)::text AS shard_maxvalue, pg_relation_size(%s) AS shard_size FROM %s ",
|
||||
shardId, partitionColumnName,
|
||||
partitionColumnName,
|
||||
quotedShardName, shardName);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* we don't need to update min/max for non-append distributed tables because they don't change */
|
||||
appendStringInfo(selectQuery,
|
||||
"SELECT " UINT64_FORMAT
|
||||
" AS shard_id, NULL::text AS shard_minvalue, NULL::text AS shard_maxvalue, pg_relation_size(%s) AS shard_size ",
|
||||
shardId, quotedShardName);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendShardSizeQuery appends a query in the following form to selectQuery
|
||||
* SELECT shard_name, shard_size
|
||||
*/
|
||||
static void
|
||||
AppendShardSizeQuery(StringInfo selectQuery, ShardInterval *shardInterval,
|
||||
char *quotedShardName)
|
||||
{
|
||||
appendStringInfo(selectQuery, "SELECT %s AS shard_name, ", quotedShardName);
|
||||
appendStringInfo(selectQuery, PG_RELATION_SIZE_FUNCTION, quotedShardName);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ErrorIfNotSuitableToGetSize determines whether the table is suitable to find
|
||||
* its' size with internal functions.
|
||||
|
@ -924,6 +1051,26 @@ ShardLength(uint64 shardId)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* NodeGroupHasLivePlacements returns true if there is any placement
|
||||
* on the given node group which is not a SHARD_STATE_TO_DELETE placement.
|
||||
*/
|
||||
bool
|
||||
NodeGroupHasLivePlacements(int32 groupId)
|
||||
{
|
||||
List *shardPlacements = AllShardPlacementsOnNodeGroup(groupId);
|
||||
GroupShardPlacement *placement = NULL;
|
||||
foreach_ptr(placement, shardPlacements)
|
||||
{
|
||||
if (placement->shardState != SHARD_STATE_TO_DELETE)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* NodeGroupHasShardPlacements returns whether any active shards are placed on the group
|
||||
*/
|
||||
|
|
|
@ -112,7 +112,7 @@ static bool UnsetMetadataSyncedForAll(void);
|
|||
static void ErrorIfCoordinatorMetadataSetFalse(WorkerNode *workerNode, Datum value,
|
||||
char *field);
|
||||
static WorkerNode * SetShouldHaveShards(WorkerNode *workerNode, bool shouldHaveShards);
|
||||
|
||||
static void RemoveOldShardPlacementForNodeGroup(int groupId);
|
||||
|
||||
/* declarations for dynamic loading */
|
||||
PG_FUNCTION_INFO_V1(citus_set_coordinator_host);
|
||||
|
@ -1291,9 +1291,7 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort)
|
|||
*/
|
||||
DeleteAllReferenceTablePlacementsFromNodeGroup(workerNode->groupId);
|
||||
}
|
||||
bool onlyConsiderActivePlacements = false;
|
||||
if (NodeGroupHasShardPlacements(workerNode->groupId,
|
||||
onlyConsiderActivePlacements))
|
||||
if (NodeGroupHasLivePlacements(workerNode->groupId))
|
||||
{
|
||||
if (ClusterHasReferenceTable())
|
||||
{
|
||||
|
@ -1320,6 +1318,8 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort)
|
|||
|
||||
DeleteNodeRow(workerNode->workerName, nodePort);
|
||||
|
||||
RemoveOldShardPlacementForNodeGroup(workerNode->groupId);
|
||||
|
||||
char *nodeDeleteCommand = NodeDeleteCommand(workerNode->nodeId);
|
||||
|
||||
/* make sure we don't have any lingering session lifespan connections */
|
||||
|
@ -1329,6 +1329,29 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* RemoveOldShardPlacementForNodeGroup removes all old shard placements
|
||||
* for the given node group from pg_dist_placement.
|
||||
*/
|
||||
static void
|
||||
RemoveOldShardPlacementForNodeGroup(int groupId)
|
||||
{
|
||||
/*
|
||||
* Prevent concurrent deferred drop
|
||||
*/
|
||||
LockPlacementCleanup();
|
||||
List *shardPlacementsOnNode = AllShardPlacementsOnNodeGroup(groupId);
|
||||
GroupShardPlacement *placement = NULL;
|
||||
foreach_ptr(placement, shardPlacementsOnNode)
|
||||
{
|
||||
if (placement->shardState == SHARD_STATE_TO_DELETE)
|
||||
{
|
||||
DeleteShardPlacementRow(placement->placementId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CanRemoveReferenceTablePlacements returns true if active primary
|
||||
* node count is more than 1, which means that even if we remove a node
|
||||
|
@ -1384,16 +1407,34 @@ AddNodeMetadata(char *nodeName, int32 nodePort,
|
|||
*nodeAlreadyExists = false;
|
||||
|
||||
/*
|
||||
* Take an exclusive lock on pg_dist_node to serialize node changes.
|
||||
* Prevent / wait for concurrent modification before checking whether
|
||||
* the worker already exists in pg_dist_node.
|
||||
*/
|
||||
LockRelationOid(DistNodeRelationId(), RowShareLock);
|
||||
|
||||
WorkerNode *workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort);
|
||||
if (workerNode != NULL)
|
||||
{
|
||||
/* return early without holding locks when the node already exists */
|
||||
*nodeAlreadyExists = true;
|
||||
|
||||
return workerNode->nodeId;
|
||||
}
|
||||
|
||||
/*
|
||||
* We are going to change pg_dist_node, prevent any concurrent reads that
|
||||
* are not tolerant to concurrent node addition by taking an exclusive
|
||||
* lock (conflicts with all but AccessShareLock).
|
||||
*
|
||||
* We may want to relax or have more fine-grained locking in the future
|
||||
* to allow users to add multiple nodes concurrently.
|
||||
*/
|
||||
LockRelationOid(DistNodeRelationId(), ExclusiveLock);
|
||||
|
||||
WorkerNode *workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort);
|
||||
/* recheck in case 2 node additions pass the first check concurrently */
|
||||
workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort);
|
||||
if (workerNode != NULL)
|
||||
{
|
||||
/* fill return data and return */
|
||||
*nodeAlreadyExists = true;
|
||||
|
||||
return workerNode->nodeId;
|
||||
|
@ -1800,7 +1841,7 @@ InsertPlaceholderCoordinatorRecord(void)
|
|||
bool nodeAlreadyExists = false;
|
||||
|
||||
/* as long as there is a single node, localhost should be ok */
|
||||
AddNodeMetadata("localhost", PostPortNumber, &nodeMetadata, &nodeAlreadyExists);
|
||||
AddNodeMetadata(LocalHostName, PostPortNumber, &nodeMetadata, &nodeAlreadyExists);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -332,7 +332,7 @@ DropShards(Oid relationId, char *schemaName, char *relationName,
|
|||
*/
|
||||
if (MultiShardCommitProtocol == COMMIT_PROTOCOL_2PC)
|
||||
{
|
||||
CoordinatedTransactionUse2PC();
|
||||
CoordinatedTransactionShouldUse2PC();
|
||||
}
|
||||
|
||||
List *dropTaskList = DropTaskList(relationId, schemaName, relationName,
|
||||
|
|
|
@ -85,6 +85,7 @@ PG_FUNCTION_INFO_V1(master_get_table_ddl_events);
|
|||
PG_FUNCTION_INFO_V1(master_get_new_shardid);
|
||||
PG_FUNCTION_INFO_V1(master_get_new_placementid);
|
||||
PG_FUNCTION_INFO_V1(master_get_active_worker_nodes);
|
||||
PG_FUNCTION_INFO_V1(citus_get_active_worker_nodes);
|
||||
PG_FUNCTION_INFO_V1(master_get_round_robin_candidate_nodes);
|
||||
PG_FUNCTION_INFO_V1(master_stage_shard_row);
|
||||
PG_FUNCTION_INFO_V1(master_stage_shard_placement_row);
|
||||
|
@ -442,12 +443,12 @@ master_stage_shard_placement_row(PG_FUNCTION_ARGS)
|
|||
|
||||
|
||||
/*
|
||||
* master_get_active_worker_nodes returns a set of active worker host names and
|
||||
* citus_get_active_worker_nodes returns a set of active worker host names and
|
||||
* port numbers in deterministic order. Currently we assume that all worker
|
||||
* nodes in pg_dist_node are active.
|
||||
*/
|
||||
Datum
|
||||
master_get_active_worker_nodes(PG_FUNCTION_ARGS)
|
||||
citus_get_active_worker_nodes(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *functionContext = NULL;
|
||||
uint32 workerNodeIndex = 0;
|
||||
|
@ -512,6 +513,16 @@ master_get_active_worker_nodes(PG_FUNCTION_ARGS)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* master_get_active_worker_nodes is a wrapper function for old UDF name.
|
||||
*/
|
||||
Datum
|
||||
master_get_active_worker_nodes(PG_FUNCTION_ARGS)
|
||||
{
|
||||
return citus_get_active_worker_nodes(fcinfo);
|
||||
}
|
||||
|
||||
|
||||
/* Finds the relationId from a potentially qualified relation name. */
|
||||
Oid
|
||||
ResolveRelationId(text *relationName, bool missingOk)
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/shard_cleaner.h"
|
||||
#include "distributed/resource_lock.h"
|
||||
#include "distributed/worker_transaction.h"
|
||||
|
||||
|
||||
|
@ -23,7 +24,7 @@
|
|||
PG_FUNCTION_INFO_V1(master_defer_delete_shards);
|
||||
|
||||
|
||||
static int DropMarkedShards(void);
|
||||
static int DropMarkedShards(bool waitForCleanupLock);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -44,7 +45,8 @@ master_defer_delete_shards(PG_FUNCTION_ARGS)
|
|||
CheckCitusVersion(ERROR);
|
||||
EnsureCoordinator();
|
||||
|
||||
int droppedShardCount = DropMarkedShards();
|
||||
bool waitForCleanupLock = true;
|
||||
int droppedShardCount = DropMarkedShards(waitForCleanupLock);
|
||||
|
||||
PG_RETURN_INT32(droppedShardCount);
|
||||
}
|
||||
|
@ -55,14 +57,14 @@ master_defer_delete_shards(PG_FUNCTION_ARGS)
|
|||
* any errors to make it safe to use in the maintenance daemon.
|
||||
*/
|
||||
int
|
||||
TryDropMarkedShards(void)
|
||||
TryDropMarkedShards(bool waitForCleanupLock)
|
||||
{
|
||||
int droppedShardCount = 0;
|
||||
MemoryContext savedContext = CurrentMemoryContext;
|
||||
|
||||
PG_TRY();
|
||||
{
|
||||
droppedShardCount = DropMarkedShards();
|
||||
droppedShardCount = DropMarkedShards(waitForCleanupLock);
|
||||
}
|
||||
PG_CATCH();
|
||||
{
|
||||
|
@ -88,9 +90,15 @@ TryDropMarkedShards(void)
|
|||
* group and continues with others. The group that has been skipped will be
|
||||
* removed at a later time when there are no locks held anymore on those
|
||||
* placements.
|
||||
*
|
||||
* Before doing any of this it will take an exclusive PlacementCleanup lock.
|
||||
* This is to ensure that this function is not being run concurrently.
|
||||
* Otherwise really bad race conditions are possible, such as removing all
|
||||
* placements of a shard. waitForCleanupLock indicates if this function should
|
||||
* wait for this lock or returns with a warning.
|
||||
*/
|
||||
static int
|
||||
DropMarkedShards(void)
|
||||
DropMarkedShards(bool waitForCleanupLock)
|
||||
{
|
||||
int removedShardCount = 0;
|
||||
ListCell *shardPlacementCell = NULL;
|
||||
|
@ -100,6 +108,16 @@ DropMarkedShards(void)
|
|||
return removedShardCount;
|
||||
}
|
||||
|
||||
if (waitForCleanupLock)
|
||||
{
|
||||
LockPlacementCleanup();
|
||||
}
|
||||
else if (!TryLockPlacementCleanup())
|
||||
{
|
||||
ereport(WARNING, (errmsg("could not acquire lock to cleanup placements")));
|
||||
return 0;
|
||||
}
|
||||
|
||||
List *shardPlacementList = AllShardPlacementsWithShardPlacementState(
|
||||
SHARD_STATE_TO_DELETE);
|
||||
foreach(shardPlacementCell, shardPlacementList)
|
||||
|
|
|
@ -30,7 +30,6 @@
|
|||
#include "distributed/connection_management.h"
|
||||
#include "distributed/enterprise.h"
|
||||
#include "distributed/hash_helpers.h"
|
||||
#include "distributed/intermediate_result_pruning.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
|
@ -647,12 +646,12 @@ SetupRebalanceMonitor(List *placementUpdateList, Oid relationId)
|
|||
List *colocatedUpdateList = GetColocatedRebalanceSteps(placementUpdateList);
|
||||
ListCell *colocatedUpdateCell = NULL;
|
||||
|
||||
ProgressMonitorData *monitor = CreateProgressMonitor(REBALANCE_ACTIVITY_MAGIC_NUMBER,
|
||||
list_length(colocatedUpdateList),
|
||||
sizeof(
|
||||
PlacementUpdateEventProgress),
|
||||
relationId);
|
||||
PlacementUpdateEventProgress *rebalanceSteps = monitor->steps;
|
||||
dsm_handle dsmHandle;
|
||||
ProgressMonitorData *monitor = CreateProgressMonitor(
|
||||
list_length(colocatedUpdateList),
|
||||
sizeof(PlacementUpdateEventProgress),
|
||||
&dsmHandle);
|
||||
PlacementUpdateEventProgress *rebalanceSteps = ProgressMonitorSteps(monitor);
|
||||
|
||||
int32 eventIndex = 0;
|
||||
foreach(colocatedUpdateCell, colocatedUpdateList)
|
||||
|
@ -670,6 +669,7 @@ SetupRebalanceMonitor(List *placementUpdateList, Oid relationId)
|
|||
|
||||
eventIndex++;
|
||||
}
|
||||
RegisterProgressMonitor(REBALANCE_ACTIVITY_MAGIC_NUMBER, relationId, dsmHandle);
|
||||
}
|
||||
|
||||
|
||||
|
@ -814,7 +814,7 @@ citus_drain_node(PG_FUNCTION_ARGS)
|
|||
|
||||
char *nodeName = text_to_cstring(nodeNameText);
|
||||
int connectionFlag = FORCE_NEW_CONNECTION;
|
||||
MultiConnection *connection = GetNodeConnection(connectionFlag, LOCAL_HOST_NAME,
|
||||
MultiConnection *connection = GetNodeConnection(connectionFlag, LocalHostName,
|
||||
PostPortNumber);
|
||||
|
||||
/*
|
||||
|
@ -972,7 +972,6 @@ Datum
|
|||
get_rebalance_progress(PG_FUNCTION_ARGS)
|
||||
{
|
||||
List *segmentList = NIL;
|
||||
ListCell *rebalanceMonitorCell = NULL;
|
||||
TupleDesc tupdesc;
|
||||
Tuplestorestate *tupstore = SetupTuplestore(fcinfo, &tupdesc);
|
||||
|
||||
|
@ -980,11 +979,11 @@ get_rebalance_progress(PG_FUNCTION_ARGS)
|
|||
List *rebalanceMonitorList = ProgressMonitorList(REBALANCE_ACTIVITY_MAGIC_NUMBER,
|
||||
&segmentList);
|
||||
|
||||
foreach(rebalanceMonitorCell, rebalanceMonitorList)
|
||||
ProgressMonitorData *monitor = NULL;
|
||||
foreach_ptr(monitor, rebalanceMonitorList)
|
||||
{
|
||||
ProgressMonitorData *monitor = lfirst(rebalanceMonitorCell);
|
||||
PlacementUpdateEventProgress *placementUpdateEvents = monitor->steps;
|
||||
|
||||
PlacementUpdateEventProgress *placementUpdateEvents = ProgressMonitorSteps(
|
||||
monitor);
|
||||
for (int eventIndex = 0; eventIndex < monitor->stepCount; eventIndex++)
|
||||
{
|
||||
PlacementUpdateEventProgress *step = placementUpdateEvents + eventIndex;
|
||||
|
@ -1201,7 +1200,7 @@ UpdateShardPlacement(PlacementUpdateEvent *placementUpdateEvent,
|
|||
sourceNode->workerPort,
|
||||
REBALANCE_PROGRESS_MOVING);
|
||||
|
||||
MultiConnection *connection = GetNodeConnection(connectionFlag, LOCAL_HOST_NAME,
|
||||
MultiConnection *connection = GetNodeConnection(connectionFlag, LocalHostName,
|
||||
PostPortNumber);
|
||||
|
||||
/*
|
||||
|
@ -2141,9 +2140,9 @@ UpdateColocatedShardPlacementProgress(uint64 shardId, char *sourceName, int sour
|
|||
{
|
||||
ProgressMonitorData *header = GetCurrentProgressMonitor();
|
||||
|
||||
if (header != NULL && header->steps != NULL)
|
||||
if (header != NULL)
|
||||
{
|
||||
PlacementUpdateEventProgress *steps = header->steps;
|
||||
PlacementUpdateEventProgress *steps = ProgressMonitorSteps(header);
|
||||
ListCell *colocatedShardIntervalCell = NULL;
|
||||
|
||||
ShardInterval *shardInterval = LoadShardInterval(shardId);
|
||||
|
|
|
@ -32,7 +32,9 @@
|
|||
#include "distributed/connection_management.h"
|
||||
#include "distributed/deparse_shard_query.h"
|
||||
#include "distributed/distributed_planner.h"
|
||||
#include "distributed/foreign_key_relationship.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/lock_graph.h"
|
||||
#include "distributed/multi_client_executor.h"
|
||||
#include "distributed/multi_executor.h"
|
||||
#include "distributed/metadata_utility.h"
|
||||
|
@ -65,12 +67,22 @@ static List * RelationShardListForShardCreate(ShardInterval *shardInterval);
|
|||
static bool WorkerShardStats(ShardPlacement *placement, Oid relationId,
|
||||
const char *shardName, uint64 *shardSize,
|
||||
text **shardMinValue, text **shardMaxValue);
|
||||
static void UpdateTableStatistics(Oid relationId);
|
||||
static void ReceiveAndUpdateShardsSizeAndMinMax(List *connectionList);
|
||||
static void UpdateShardSizeAndMinMax(uint64 shardId, ShardInterval *shardInterval, Oid
|
||||
relationId, List *shardPlacementList, uint64
|
||||
shardSize, text *shardMinValue,
|
||||
text *shardMaxValue);
|
||||
static bool ProcessShardStatisticsRow(PGresult *result, int64 rowIndex, uint64 *shardId,
|
||||
text **shardMinValue, text **shardMaxValue,
|
||||
uint64 *shardSize);
|
||||
|
||||
/* exports for SQL callable functions */
|
||||
PG_FUNCTION_INFO_V1(master_create_empty_shard);
|
||||
PG_FUNCTION_INFO_V1(master_append_table_to_shard);
|
||||
PG_FUNCTION_INFO_V1(citus_update_shard_statistics);
|
||||
PG_FUNCTION_INFO_V1(master_update_shard_statistics);
|
||||
PG_FUNCTION_INFO_V1(citus_update_table_statistics);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -361,6 +373,23 @@ citus_update_shard_statistics(PG_FUNCTION_ARGS)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* citus_update_table_statistics updates metadata (shard size and shard min/max
|
||||
* values) of the shards of the given table
|
||||
*/
|
||||
Datum
|
||||
citus_update_table_statistics(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid distributedTableId = PG_GETARG_OID(0);
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
UpdateTableStatistics(distributedTableId);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* master_update_shard_statistics is a wrapper function for old UDF name.
|
||||
*/
|
||||
|
@ -782,7 +811,6 @@ UpdateShardStatistics(int64 shardId)
|
|||
{
|
||||
ShardInterval *shardInterval = LoadShardInterval(shardId);
|
||||
Oid relationId = shardInterval->relationId;
|
||||
char storageType = shardInterval->storageType;
|
||||
bool statsOK = false;
|
||||
uint64 shardSize = 0;
|
||||
text *minValue = NULL;
|
||||
|
@ -825,17 +853,166 @@ UpdateShardStatistics(int64 shardId)
|
|||
errdetail("Setting shard statistics to NULL")));
|
||||
}
|
||||
|
||||
/* make sure we don't process cancel signals */
|
||||
HOLD_INTERRUPTS();
|
||||
UpdateShardSizeAndMinMax(shardId, shardInterval, relationId, shardPlacementList,
|
||||
shardSize, minValue, maxValue);
|
||||
return shardSize;
|
||||
}
|
||||
|
||||
/* update metadata for each shard placement we appended to */
|
||||
|
||||
/*
|
||||
* UpdateTableStatistics updates metadata (shard size and shard min/max values)
|
||||
* of the shards of the given table. Follows a similar logic to citus_shard_sizes function.
|
||||
*/
|
||||
static void
|
||||
UpdateTableStatistics(Oid relationId)
|
||||
{
|
||||
List *citusTableIds = NIL;
|
||||
citusTableIds = lappend_oid(citusTableIds, relationId);
|
||||
|
||||
/* we want to use a distributed transaction here to detect distributed deadlocks */
|
||||
bool useDistributedTransaction = true;
|
||||
|
||||
/* we also want shard min/max values for append distributed tables */
|
||||
bool useShardMinMaxQuery = true;
|
||||
|
||||
List *connectionList = SendShardStatisticsQueriesInParallel(citusTableIds,
|
||||
useDistributedTransaction,
|
||||
useShardMinMaxQuery);
|
||||
|
||||
ReceiveAndUpdateShardsSizeAndMinMax(connectionList);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ReceiveAndUpdateShardsSizeAndMinMax receives shard id, size
|
||||
* and min max results from the given connection list, and updates
|
||||
* respective entries in pg_dist_placement and pg_dist_shard
|
||||
*/
|
||||
static void
|
||||
ReceiveAndUpdateShardsSizeAndMinMax(List *connectionList)
|
||||
{
|
||||
/*
|
||||
* From the connection list, we will not get all the shards, but
|
||||
* all the placements. We use a hash table to remember already visited shard ids
|
||||
* since we update all the different placements of a shard id at once.
|
||||
*/
|
||||
HTAB *alreadyVisitedShardPlacements = CreateOidVisitedHashSet();
|
||||
|
||||
MultiConnection *connection = NULL;
|
||||
foreach_ptr(connection, connectionList)
|
||||
{
|
||||
if (PQstatus(connection->pgConn) != CONNECTION_OK)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
bool raiseInterrupts = true;
|
||||
PGresult *result = GetRemoteCommandResult(connection, raiseInterrupts);
|
||||
if (!IsResponseOK(result))
|
||||
{
|
||||
ReportResultError(connection, result, WARNING);
|
||||
continue;
|
||||
}
|
||||
|
||||
int64 rowCount = PQntuples(result);
|
||||
int64 colCount = PQnfields(result);
|
||||
|
||||
/* Although it is not expected */
|
||||
if (colCount != UPDATE_SHARD_STATISTICS_COLUMN_COUNT)
|
||||
{
|
||||
ereport(WARNING, (errmsg("unexpected number of columns from "
|
||||
"citus_update_table_statistics")));
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int64 rowIndex = 0; rowIndex < rowCount; rowIndex++)
|
||||
{
|
||||
uint64 shardId = 0;
|
||||
text *shardMinValue = NULL;
|
||||
text *shardMaxValue = NULL;
|
||||
uint64 shardSize = 0;
|
||||
|
||||
if (!ProcessShardStatisticsRow(result, rowIndex, &shardId, &shardMinValue,
|
||||
&shardMaxValue, &shardSize))
|
||||
{
|
||||
/* this row has no valid shard statistics */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (OidVisited(alreadyVisitedShardPlacements, shardId))
|
||||
{
|
||||
/* We have already updated this placement list */
|
||||
continue;
|
||||
}
|
||||
|
||||
VisitOid(alreadyVisitedShardPlacements, shardId);
|
||||
|
||||
ShardInterval *shardInterval = LoadShardInterval(shardId);
|
||||
Oid relationId = shardInterval->relationId;
|
||||
List *shardPlacementList = ActiveShardPlacementList(shardId);
|
||||
|
||||
UpdateShardSizeAndMinMax(shardId, shardInterval, relationId,
|
||||
shardPlacementList, shardSize, shardMinValue,
|
||||
shardMaxValue);
|
||||
}
|
||||
PQclear(result);
|
||||
ForgetResults(connection);
|
||||
}
|
||||
hash_destroy(alreadyVisitedShardPlacements);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ProcessShardStatisticsRow processes a row of shard statistics of the input PGresult
|
||||
* - it returns true if this row belongs to a valid shard
|
||||
* - it returns false if this row has no valid shard statistics (shardId = INVALID_SHARD_ID)
|
||||
*/
|
||||
static bool
|
||||
ProcessShardStatisticsRow(PGresult *result, int64 rowIndex, uint64 *shardId,
|
||||
text **shardMinValue, text **shardMaxValue, uint64 *shardSize)
|
||||
{
|
||||
*shardId = ParseIntField(result, rowIndex, 0);
|
||||
|
||||
/* check for the dummy entries we put so that UNION ALL wouldn't complain */
|
||||
if (*shardId == INVALID_SHARD_ID)
|
||||
{
|
||||
/* this row has no valid shard statistics */
|
||||
return false;
|
||||
}
|
||||
|
||||
char *minValueResult = PQgetvalue(result, rowIndex, 1);
|
||||
char *maxValueResult = PQgetvalue(result, rowIndex, 2);
|
||||
*shardMinValue = cstring_to_text(minValueResult);
|
||||
*shardMaxValue = cstring_to_text(maxValueResult);
|
||||
*shardSize = ParseIntField(result, rowIndex, 3);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* UpdateShardSizeAndMinMax updates the shardlength (shard size) of the given
|
||||
* shard and its placements in pg_dist_placement, and updates the shard min value
|
||||
* and shard max value of the given shard in pg_dist_shard if the relationId belongs
|
||||
* to an append-distributed table
|
||||
*/
|
||||
static void
|
||||
UpdateShardSizeAndMinMax(uint64 shardId, ShardInterval *shardInterval, Oid relationId,
|
||||
List *shardPlacementList, uint64 shardSize, text *shardMinValue,
|
||||
text *shardMaxValue)
|
||||
{
|
||||
char storageType = shardInterval->storageType;
|
||||
|
||||
ShardPlacement *placement = NULL;
|
||||
|
||||
/* update metadata for each shard placement */
|
||||
foreach_ptr(placement, shardPlacementList)
|
||||
{
|
||||
uint64 placementId = placement->placementId;
|
||||
int32 groupId = placement->groupId;
|
||||
|
||||
DeleteShardPlacementRow(placementId);
|
||||
InsertShardPlacementRow(shardId, placementId, SHARD_STATE_ACTIVE, shardSize,
|
||||
InsertShardPlacementRow(shardId, placementId, SHARD_STATE_ACTIVE,
|
||||
shardSize,
|
||||
groupId);
|
||||
}
|
||||
|
||||
|
@ -843,18 +1020,9 @@ UpdateShardStatistics(int64 shardId)
|
|||
if (IsCitusTableType(relationId, APPEND_DISTRIBUTED))
|
||||
{
|
||||
DeleteShardRow(shardId);
|
||||
InsertShardRow(relationId, shardId, storageType, minValue, maxValue);
|
||||
InsertShardRow(relationId, shardId, storageType, shardMinValue,
|
||||
shardMaxValue);
|
||||
}
|
||||
|
||||
if (QueryCancelPending)
|
||||
{
|
||||
ereport(WARNING, (errmsg("cancel requests are ignored during metadata update")));
|
||||
QueryCancelPending = false;
|
||||
}
|
||||
|
||||
RESUME_INTERRUPTS();
|
||||
|
||||
return shardSize;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -38,10 +38,8 @@
|
|||
#include "utils/rel.h"
|
||||
#include "utils/syscache.h"
|
||||
|
||||
static void UpdateTaskQueryString(Query *query, Oid distributedTableId,
|
||||
RangeTblEntry *valuesRTE, Task *task);
|
||||
static bool ReplaceRelationConstraintByShardConstraint(List *relationShardList,
|
||||
OnConflictExpr *onConflict);
|
||||
|
||||
static void UpdateTaskQueryString(Query *query, Task *task);
|
||||
static RelationShard * FindRelationShard(Oid inputRelationId, List *relationShardList);
|
||||
static void ConvertRteToSubqueryWithEmptyResult(RangeTblEntry *rte);
|
||||
static bool ShouldLazyDeparseQuery(Task *task);
|
||||
|
@ -57,27 +55,43 @@ RebuildQueryStrings(Job *workerJob)
|
|||
{
|
||||
Query *originalQuery = workerJob->jobQuery;
|
||||
List *taskList = workerJob->taskList;
|
||||
Oid relationId = ((RangeTblEntry *) linitial(originalQuery->rtable))->relid;
|
||||
RangeTblEntry *valuesRTE = ExtractDistributedInsertValuesRTE(originalQuery);
|
||||
|
||||
Task *task = NULL;
|
||||
|
||||
if (originalQuery->commandType == CMD_INSERT)
|
||||
{
|
||||
AddInsertAliasIfNeeded(originalQuery);
|
||||
}
|
||||
|
||||
foreach_ptr(task, taskList)
|
||||
{
|
||||
Query *query = originalQuery;
|
||||
|
||||
if (UpdateOrDeleteQuery(query) && list_length(taskList) > 1)
|
||||
/*
|
||||
* Copy the query if there are multiple tasks. If there is a single
|
||||
* task, we scribble on the original query to avoid the copying
|
||||
* overhead.
|
||||
*/
|
||||
if (list_length(taskList) > 1)
|
||||
{
|
||||
query = copyObject(originalQuery);
|
||||
}
|
||||
|
||||
if (UpdateOrDeleteQuery(query))
|
||||
{
|
||||
/*
|
||||
* For UPDATE and DELETE queries, we may have subqueries and joins, so
|
||||
* we use relation shard list to update shard names and call
|
||||
* pg_get_query_def() directly.
|
||||
*/
|
||||
List *relationShardList = task->relationShardList;
|
||||
UpdateRelationToShardNames((Node *) query, relationShardList);
|
||||
}
|
||||
else if (query->commandType == CMD_INSERT && task->modifyWithSubquery)
|
||||
{
|
||||
/* for INSERT..SELECT, adjust shard names in SELECT part */
|
||||
List *relationShardList = task->relationShardList;
|
||||
ShardInterval *shardInterval = LoadShardInterval(task->anchorShardId);
|
||||
|
||||
query = copyObject(originalQuery);
|
||||
|
||||
RangeTblEntry *copiedInsertRte = ExtractResultRelationRTEOrError(query);
|
||||
RangeTblEntry *copiedSubqueryRte = ExtractSelectRangeTableEntry(query);
|
||||
Query *copiedSubquery = copiedSubqueryRte->subquery;
|
||||
|
@ -90,29 +104,18 @@ RebuildQueryStrings(Job *workerJob)
|
|||
|
||||
ReorderInsertSelectTargetLists(query, copiedInsertRte, copiedSubqueryRte);
|
||||
|
||||
/* setting an alias simplifies deparsing of RETURNING */
|
||||
if (copiedInsertRte->alias == NULL)
|
||||
{
|
||||
Alias *alias = makeAlias(CITUS_TABLE_ALIAS, NIL);
|
||||
copiedInsertRte->alias = alias;
|
||||
}
|
||||
|
||||
UpdateRelationToShardNames((Node *) copiedSubquery, relationShardList);
|
||||
}
|
||||
else if (query->commandType == CMD_INSERT && (query->onConflict != NULL ||
|
||||
valuesRTE != NULL))
|
||||
|
||||
if (query->commandType == CMD_INSERT)
|
||||
{
|
||||
RangeTblEntry *modifiedRelationRTE = linitial(originalQuery->rtable);
|
||||
|
||||
/*
|
||||
* Always an alias in UPSERTs and multi-row INSERTs to avoid
|
||||
* deparsing issues (e.g. RETURNING might reference the original
|
||||
* table name, which has been replaced by a shard name).
|
||||
* We store the modified relaiton ID in the task so we can lazily call
|
||||
* deparse_shard_query when the string is needed
|
||||
*/
|
||||
RangeTblEntry *rangeTableEntry = linitial(query->rtable);
|
||||
if (rangeTableEntry->alias == NULL)
|
||||
{
|
||||
Alias *alias = makeAlias(CITUS_TABLE_ALIAS, NIL);
|
||||
rangeTableEntry->alias = alias;
|
||||
}
|
||||
task->anchorDistributedTableId = modifiedRelationRTE->relid;
|
||||
}
|
||||
|
||||
bool isQueryObjectOrText = GetTaskQueryType(task) == TASK_QUERY_TEXT ||
|
||||
|
@ -122,7 +125,7 @@ RebuildQueryStrings(Job *workerJob)
|
|||
? "(null)"
|
||||
: ApplyLogRedaction(TaskQueryString(task)))));
|
||||
|
||||
UpdateTaskQueryString(query, relationId, valuesRTE, task);
|
||||
UpdateTaskQueryString(query, task);
|
||||
|
||||
/*
|
||||
* If parameters were resolved in the job query, then they are now also
|
||||
|
@ -136,54 +139,69 @@ RebuildQueryStrings(Job *workerJob)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* AddInsertAliasIfNeeded adds an alias in UPSERTs and multi-row INSERTs to avoid
|
||||
* deparsing issues (e.g. RETURNING might reference the original table name,
|
||||
* which has been replaced by a shard name).
|
||||
*/
|
||||
void
|
||||
AddInsertAliasIfNeeded(Query *query)
|
||||
{
|
||||
Assert(query->commandType == CMD_INSERT);
|
||||
|
||||
if (query->onConflict == NULL &&
|
||||
ExtractDistributedInsertValuesRTE(query) == NULL)
|
||||
{
|
||||
/* simple single-row insert does not need an alias */
|
||||
return;
|
||||
}
|
||||
|
||||
RangeTblEntry *rangeTableEntry = linitial(query->rtable);
|
||||
if (rangeTableEntry->alias != NULL)
|
||||
{
|
||||
/* INSERT already has an alias */
|
||||
return;
|
||||
}
|
||||
|
||||
Alias *alias = makeAlias(CITUS_TABLE_ALIAS, NIL);
|
||||
rangeTableEntry->alias = alias;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* UpdateTaskQueryString updates the query string stored within the provided
|
||||
* Task. If the Task has row values from a multi-row INSERT, those are injected
|
||||
* into the provided query (using the provided valuesRTE, which must belong to
|
||||
* the query) before deparse occurs (the query's full VALUES list will be
|
||||
* restored before this function returns).
|
||||
* into the provided query before deparse occurs (the query's full VALUES list
|
||||
* will be restored before this function returns).
|
||||
*/
|
||||
static void
|
||||
UpdateTaskQueryString(Query *query, Oid distributedTableId, RangeTblEntry *valuesRTE,
|
||||
Task *task)
|
||||
UpdateTaskQueryString(Query *query, Task *task)
|
||||
{
|
||||
List *oldValuesLists = NIL;
|
||||
|
||||
if (valuesRTE != NULL)
|
||||
{
|
||||
Assert(valuesRTE->rtekind == RTE_VALUES);
|
||||
Assert(task->rowValuesLists != NULL);
|
||||
|
||||
oldValuesLists = valuesRTE->values_lists;
|
||||
valuesRTE->values_lists = task->rowValuesLists;
|
||||
}
|
||||
|
||||
if (query->commandType != CMD_INSERT)
|
||||
{
|
||||
/*
|
||||
* For UPDATE and DELETE queries, we may have subqueries and joins, so
|
||||
* we use relation shard list to update shard names and call
|
||||
* pg_get_query_def() directly.
|
||||
*/
|
||||
List *relationShardList = task->relationShardList;
|
||||
UpdateRelationToShardNames((Node *) query, relationShardList);
|
||||
}
|
||||
else if (ShouldLazyDeparseQuery(task))
|
||||
{
|
||||
/*
|
||||
* not all insert queries are copied before calling this
|
||||
* function, so we do it here
|
||||
*/
|
||||
query = copyObject(query);
|
||||
}
|
||||
RangeTblEntry *valuesRTE = NULL;
|
||||
|
||||
if (query->commandType == CMD_INSERT)
|
||||
{
|
||||
/*
|
||||
* We store this in the task so we can lazily call
|
||||
* deparse_shard_query when the string is needed
|
||||
*/
|
||||
task->anchorDistributedTableId = distributedTableId;
|
||||
/* extract the VALUES from the INSERT */
|
||||
valuesRTE = ExtractDistributedInsertValuesRTE(query);
|
||||
|
||||
if (valuesRTE != NULL)
|
||||
{
|
||||
Assert(valuesRTE->rtekind == RTE_VALUES);
|
||||
Assert(task->rowValuesLists != NULL);
|
||||
|
||||
oldValuesLists = valuesRTE->values_lists;
|
||||
valuesRTE->values_lists = task->rowValuesLists;
|
||||
}
|
||||
|
||||
if (ShouldLazyDeparseQuery(task))
|
||||
{
|
||||
/*
|
||||
* not all insert queries are copied before calling this
|
||||
* function, so we do it here
|
||||
*/
|
||||
query = copyObject(query);
|
||||
}
|
||||
}
|
||||
|
||||
SetTaskQueryIfShouldLazyDeparse(task, query);
|
||||
|
@ -266,124 +284,6 @@ UpdateRelationToShardNames(Node *node, List *relationShardList)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* UpdateRelationsToLocalShardTables walks over the query tree and appends shard ids to
|
||||
* relations. The caller is responsible for ensuring that the resulting Query can
|
||||
* be executed locally.
|
||||
*/
|
||||
bool
|
||||
UpdateRelationsToLocalShardTables(Node *node, List *relationShardList)
|
||||
{
|
||||
if (node == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/* want to look at all RTEs, even in subqueries, CTEs and such */
|
||||
if (IsA(node, Query))
|
||||
{
|
||||
return query_tree_walker((Query *) node, UpdateRelationsToLocalShardTables,
|
||||
relationShardList, QTW_EXAMINE_RTES_BEFORE);
|
||||
}
|
||||
|
||||
if (IsA(node, OnConflictExpr))
|
||||
{
|
||||
OnConflictExpr *onConflict = (OnConflictExpr *) node;
|
||||
|
||||
return ReplaceRelationConstraintByShardConstraint(relationShardList, onConflict);
|
||||
}
|
||||
|
||||
if (!IsA(node, RangeTblEntry))
|
||||
{
|
||||
return expression_tree_walker(node, UpdateRelationsToLocalShardTables,
|
||||
relationShardList);
|
||||
}
|
||||
|
||||
RangeTblEntry *newRte = (RangeTblEntry *) node;
|
||||
|
||||
if (newRte->rtekind != RTE_RELATION)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
RelationShard *relationShard = FindRelationShard(newRte->relid,
|
||||
relationShardList);
|
||||
|
||||
/* the function should only be called with local shards */
|
||||
if (relationShard == NULL)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
Oid shardOid = GetTableLocalShardOid(relationShard->relationId,
|
||||
relationShard->shardId);
|
||||
|
||||
newRte->relid = shardOid;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ReplaceRelationConstraintByShardConstraint replaces given OnConflictExpr's
|
||||
* constraint id with constraint id of the corresponding shard.
|
||||
*/
|
||||
static bool
|
||||
ReplaceRelationConstraintByShardConstraint(List *relationShardList,
|
||||
OnConflictExpr *onConflict)
|
||||
{
|
||||
Oid constraintId = onConflict->constraint;
|
||||
|
||||
if (!OidIsValid(constraintId))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
Oid constraintRelationId = InvalidOid;
|
||||
|
||||
HeapTuple heapTuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(constraintId));
|
||||
if (HeapTupleIsValid(heapTuple))
|
||||
{
|
||||
Form_pg_constraint contup = (Form_pg_constraint) GETSTRUCT(heapTuple);
|
||||
|
||||
constraintRelationId = contup->conrelid;
|
||||
ReleaseSysCache(heapTuple);
|
||||
}
|
||||
|
||||
/*
|
||||
* We can return here without calling the walker function, since we know there
|
||||
* will be no possible tables or constraints after this point, by the syntax.
|
||||
*/
|
||||
if (!OidIsValid(constraintRelationId))
|
||||
{
|
||||
ereport(ERROR, (errmsg("Invalid relation id (%u) for constraint: %s",
|
||||
constraintRelationId, get_constraint_name(constraintId))));
|
||||
}
|
||||
|
||||
RelationShard *relationShard = FindRelationShard(constraintRelationId,
|
||||
relationShardList);
|
||||
|
||||
if (relationShard != NULL)
|
||||
{
|
||||
char *constraintName = get_constraint_name(constraintId);
|
||||
|
||||
AppendShardIdToName(&constraintName, relationShard->shardId);
|
||||
|
||||
Oid shardOid = GetTableLocalShardOid(relationShard->relationId,
|
||||
relationShard->shardId);
|
||||
|
||||
Oid shardConstraintId = get_relation_constraint_oid(shardOid, constraintName,
|
||||
false);
|
||||
|
||||
onConflict->constraint = shardConstraintId;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* FindRelationShard finds the RelationShard for shard relation with
|
||||
* given Oid if exists in given relationShardList. Otherwise, returns NULL.
|
||||
|
|
|
@ -49,6 +49,7 @@
|
|||
#include "executor/executor.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
#include "nodes/nodeFuncs.h"
|
||||
#include "nodes/pg_list.h"
|
||||
#include "parser/parsetree.h"
|
||||
#include "parser/parse_type.h"
|
||||
#if PG_VERSION_NUM >= PG_VERSION_12
|
||||
|
@ -98,6 +99,7 @@ static PlannedStmt * FinalizeNonRouterPlan(PlannedStmt *localPlan,
|
|||
DistributedPlan *distributedPlan,
|
||||
CustomScan *customScan);
|
||||
static PlannedStmt * FinalizeRouterPlan(PlannedStmt *localPlan, CustomScan *customScan);
|
||||
static AppendRelInfo * FindTargetAppendRelInfo(PlannerInfo *root, int relationRteIndex);
|
||||
static List * makeTargetListFromCustomScanList(List *custom_scan_tlist);
|
||||
static List * makeCustomScanTargetlistFromExistingTargetList(List *existingTargetlist);
|
||||
static int32 BlessRecordExpressionList(List *exprs);
|
||||
|
@ -124,6 +126,7 @@ static PlannedStmt * PlanFastPathDistributedStmt(DistributedPlanningContext *pla
|
|||
static PlannedStmt * PlanDistributedStmt(DistributedPlanningContext *planContext,
|
||||
int rteIdCounter);
|
||||
static RTEListProperties * GetRTEListProperties(List *rangeTableList);
|
||||
static List * TranslatedVars(PlannerInfo *root, int relationIndex);
|
||||
|
||||
|
||||
/* Distributed planner hook */
|
||||
|
@ -165,30 +168,29 @@ distributed_planner(Query *parse,
|
|||
.boundParams = boundParams,
|
||||
};
|
||||
|
||||
if (fastPathRouterQuery)
|
||||
{
|
||||
/*
|
||||
* We need to copy the parse tree because the FastPathPlanner modifies
|
||||
* it. In the next branch we do the same for other distributed queries
|
||||
* too, but for those it needs to be done AFTER calling
|
||||
* AssignRTEIdentities.
|
||||
*/
|
||||
planContext.originalQuery = copyObject(parse);
|
||||
}
|
||||
else if (needsDistributedPlanning)
|
||||
if (needsDistributedPlanning)
|
||||
{
|
||||
/*
|
||||
* standard_planner scribbles on it's input, but for deparsing we need the
|
||||
* unmodified form. Note that before copying we call
|
||||
* AssignRTEIdentities, which is needed because these identities need
|
||||
* to be present in the copied query too.
|
||||
* unmodified form. Before copying we call AssignRTEIdentities to be able
|
||||
* to match RTEs in the rewritten query tree with those in the original
|
||||
* tree.
|
||||
*/
|
||||
rteIdCounter = AssignRTEIdentities(rangeTableList, rteIdCounter);
|
||||
|
||||
planContext.originalQuery = copyObject(parse);
|
||||
|
||||
bool setPartitionedTablesInherited = false;
|
||||
AdjustPartitioningForDistributedPlanning(rangeTableList,
|
||||
setPartitionedTablesInherited);
|
||||
/*
|
||||
* When there are partitioned tables (not applicable to fast path),
|
||||
* pretend that they are regular tables to avoid unnecessary work
|
||||
* in standard_planner.
|
||||
*/
|
||||
if (!fastPathRouterQuery)
|
||||
{
|
||||
bool setPartitionedTablesInherited = false;
|
||||
AdjustPartitioningForDistributedPlanning(rangeTableList,
|
||||
setPartitionedTablesInherited);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -447,7 +449,7 @@ AssignRTEIdentity(RangeTblEntry *rangeTableEntry, int rteIdentifier)
|
|||
{
|
||||
Assert(rangeTableEntry->rtekind == RTE_RELATION);
|
||||
|
||||
rangeTableEntry->values_lists = list_make1_int(rteIdentifier);
|
||||
rangeTableEntry->values_lists = list_make2_int(rteIdentifier, rangeTableEntry->inh);
|
||||
}
|
||||
|
||||
|
||||
|
@ -458,12 +460,24 @@ GetRTEIdentity(RangeTblEntry *rte)
|
|||
Assert(rte->rtekind == RTE_RELATION);
|
||||
Assert(rte->values_lists != NIL);
|
||||
Assert(IsA(rte->values_lists, IntList));
|
||||
Assert(list_length(rte->values_lists) == 1);
|
||||
Assert(list_length(rte->values_lists) == 2);
|
||||
|
||||
return linitial_int(rte->values_lists);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetOriginalInh gets the original value of the inheritance flag set by
|
||||
* AssignRTEIdentity. The planner resets this flag in the rewritten query,
|
||||
* but we need it during deparsing.
|
||||
*/
|
||||
bool
|
||||
GetOriginalInh(RangeTblEntry *rte)
|
||||
{
|
||||
return lsecond_int(rte->values_lists);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetQueryLockMode returns the necessary lock mode to be acquired for the
|
||||
* given query. (See comment written in RangeTblEntry->rellockmode)
|
||||
|
@ -1814,6 +1828,8 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo,
|
|||
|
||||
/* see comments on GetVarFromAssignedParam() */
|
||||
relationRestriction->outerPlanParamsList = OuterPlanParamsList(root);
|
||||
relationRestriction->translatedVars = TranslatedVars(root,
|
||||
relationRestriction->index);
|
||||
|
||||
RelationRestrictionContext *relationRestrictionContext =
|
||||
plannerRestrictionContext->relationRestrictionContext;
|
||||
|
@ -1837,6 +1853,61 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* TranslatedVars deep copies the translated vars for the given relation index
|
||||
* if there is any append rel list.
|
||||
*/
|
||||
static List *
|
||||
TranslatedVars(PlannerInfo *root, int relationIndex)
|
||||
{
|
||||
List *translatedVars = NIL;
|
||||
|
||||
if (root->append_rel_list != NIL)
|
||||
{
|
||||
AppendRelInfo *targetAppendRelInfo =
|
||||
FindTargetAppendRelInfo(root, relationIndex);
|
||||
if (targetAppendRelInfo != NULL)
|
||||
{
|
||||
/* postgres deletes translated_vars after pg13, hence we deep copy them here */
|
||||
Node *targetNode = NULL;
|
||||
foreach_ptr(targetNode, targetAppendRelInfo->translated_vars)
|
||||
{
|
||||
translatedVars =
|
||||
lappend(translatedVars, copyObject(targetNode));
|
||||
}
|
||||
}
|
||||
}
|
||||
return translatedVars;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* FindTargetAppendRelInfo finds the target append rel info for the given
|
||||
* relation rte index.
|
||||
*/
|
||||
static AppendRelInfo *
|
||||
FindTargetAppendRelInfo(PlannerInfo *root, int relationRteIndex)
|
||||
{
|
||||
AppendRelInfo *appendRelInfo = NULL;
|
||||
|
||||
/* iterate on the queries that are part of UNION ALL subselects */
|
||||
foreach_ptr(appendRelInfo, root->append_rel_list)
|
||||
{
|
||||
/*
|
||||
* We're only interested in the child rel that is equal to the
|
||||
* relation we're investigating. Here we don't need to find the offset
|
||||
* because postgres adds an offset to child_relid and parent_relid after
|
||||
* calling multi_relation_restriction_hook.
|
||||
*/
|
||||
if (appendRelInfo->child_relid == relationRteIndex)
|
||||
{
|
||||
return appendRelInfo;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AdjustReadIntermediateResultCost adjusts the row count and total cost
|
||||
* of a read_intermediate_result call based on the file size.
|
||||
|
@ -2143,6 +2214,33 @@ CreateAndPushPlannerRestrictionContext(void)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* TranslatedVarsForRteIdentity gets an rteIdentity and returns the
|
||||
* translatedVars that belong to the range table relation. If no
|
||||
* translatedVars found, the function returns NIL;
|
||||
*/
|
||||
List *
|
||||
TranslatedVarsForRteIdentity(int rteIdentity)
|
||||
{
|
||||
PlannerRestrictionContext *currentPlannerRestrictionContext =
|
||||
CurrentPlannerRestrictionContext();
|
||||
|
||||
List *relationRestrictionList =
|
||||
currentPlannerRestrictionContext->relationRestrictionContext->
|
||||
relationRestrictionList;
|
||||
RelationRestriction *relationRestriction = NULL;
|
||||
foreach_ptr(relationRestriction, relationRestrictionList)
|
||||
{
|
||||
if (GetRTEIdentity(relationRestriction->rte) == rteIdentity)
|
||||
{
|
||||
return relationRestriction->translatedVars;
|
||||
}
|
||||
}
|
||||
|
||||
return NIL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CurrentRestrictionContext returns the most recently added
|
||||
* PlannerRestrictionContext from the plannerRestrictionContextList list.
|
||||
|
|
|
@ -16,7 +16,9 @@
|
|||
#include "distributed/local_plan_cache.h"
|
||||
#include "distributed/deparse_shard_query.h"
|
||||
#include "distributed/citus_ruleutils.h"
|
||||
#include "distributed/insert_select_planner.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/multi_executor.h"
|
||||
#include "distributed/version_compat.h"
|
||||
#if PG_VERSION_NUM >= PG_VERSION_12
|
||||
#include "optimizer/optimizer.h"
|
||||
|
@ -26,13 +28,21 @@
|
|||
#include "optimizer/clauses.h"
|
||||
|
||||
|
||||
static Query * GetLocalShardQueryForCache(Query *jobQuery, Task *task,
|
||||
ParamListInfo paramListInfo);
|
||||
static char * DeparseLocalShardQuery(Query *jobQuery, List *relationShardList,
|
||||
Oid anchorDistributedTableId, int64 anchorShardId);
|
||||
static int ExtractParameterTypesForParamListInfo(ParamListInfo originalParamListInfo,
|
||||
Oid **parameterTypes);
|
||||
|
||||
/*
|
||||
* CacheLocalPlanForShardQuery replaces the relation OIDs in the job query
|
||||
* with shard relation OIDs and then plans the query and caches the result
|
||||
* in the originalDistributedPlan (which may be preserved across executions).
|
||||
*/
|
||||
void
|
||||
CacheLocalPlanForShardQuery(Task *task, DistributedPlan *originalDistributedPlan)
|
||||
CacheLocalPlanForShardQuery(Task *task, DistributedPlan *originalDistributedPlan,
|
||||
ParamListInfo paramListInfo)
|
||||
{
|
||||
PlannedStmt *localPlan = GetCachedLocalPlan(task, originalDistributedPlan);
|
||||
if (localPlan != NULL)
|
||||
|
@ -58,14 +68,14 @@ CacheLocalPlanForShardQuery(Task *task, DistributedPlan *originalDistributedPlan
|
|||
* We prefer to use jobQuery (over task->query) because we don't want any
|
||||
* functions/params to have been evaluated in the cached plan.
|
||||
*/
|
||||
Query *shardQuery = copyObject(originalDistributedPlan->workerJob->jobQuery);
|
||||
Query *jobQuery = copyObject(originalDistributedPlan->workerJob->jobQuery);
|
||||
|
||||
UpdateRelationsToLocalShardTables((Node *) shardQuery, task->relationShardList);
|
||||
Query *localShardQuery = GetLocalShardQueryForCache(jobQuery, task, paramListInfo);
|
||||
|
||||
LOCKMODE lockMode = GetQueryLockMode(shardQuery);
|
||||
LOCKMODE lockMode = GetQueryLockMode(localShardQuery);
|
||||
|
||||
/* fast path queries can only have a single RTE by definition */
|
||||
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) linitial(shardQuery->rtable);
|
||||
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) linitial(localShardQuery->rtable);
|
||||
|
||||
/*
|
||||
* If the shard has been created in this transction, we wouldn't see the relationId
|
||||
|
@ -73,24 +83,16 @@ CacheLocalPlanForShardQuery(Task *task, DistributedPlan *originalDistributedPlan
|
|||
*/
|
||||
if (rangeTableEntry->relid == InvalidOid)
|
||||
{
|
||||
pfree(shardQuery);
|
||||
pfree(jobQuery);
|
||||
pfree(localShardQuery);
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
return;
|
||||
}
|
||||
|
||||
if (IsLoggableLevel(DEBUG5))
|
||||
{
|
||||
StringInfo queryString = makeStringInfo();
|
||||
pg_get_query_def(shardQuery, queryString);
|
||||
|
||||
ereport(DEBUG5, (errmsg("caching plan for query: %s",
|
||||
queryString->data)));
|
||||
}
|
||||
|
||||
LockRelationOid(rangeTableEntry->relid, lockMode);
|
||||
|
||||
LocalPlannedStatement *localPlannedStatement = CitusMakeNode(LocalPlannedStatement);
|
||||
localPlan = planner_compat(shardQuery, 0, NULL);
|
||||
localPlan = planner_compat(localShardQuery, 0, NULL);
|
||||
localPlannedStatement->localPlan = localPlan;
|
||||
localPlannedStatement->shardId = task->anchorShardId;
|
||||
localPlannedStatement->localGroupId = GetLocalGroupId();
|
||||
|
@ -103,6 +105,130 @@ CacheLocalPlanForShardQuery(Task *task, DistributedPlan *originalDistributedPlan
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetLocalShardQueryForCache is a helper function which generates
|
||||
* the local shard query based on the jobQuery. The function should
|
||||
* not be used for generic purposes, it is specialized for local cached
|
||||
* queries.
|
||||
*
|
||||
* It is not guaranteed to have consistent attribute numbers on the shards
|
||||
* and on the shell (e.g., distributed/reference tables) due to DROP COLUMN
|
||||
* commands.
|
||||
*
|
||||
* To avoid any edge cases due to such discrepancies, we first deparse the
|
||||
* jobQuery with the tables replaced to shards, and parse the query string
|
||||
* back. This is normally a very expensive operation, however we only do it
|
||||
* once per cached local plan, which is acceptable.
|
||||
*/
|
||||
static Query *
|
||||
GetLocalShardQueryForCache(Query *jobQuery, Task *task, ParamListInfo orig_paramListInfo)
|
||||
{
|
||||
char *shardQueryString =
|
||||
DeparseLocalShardQuery(jobQuery, task->relationShardList,
|
||||
task->anchorDistributedTableId,
|
||||
task->anchorShardId);
|
||||
ereport(DEBUG5, (errmsg("Local shard query that is going to be cached: %s",
|
||||
shardQueryString)));
|
||||
|
||||
Oid *parameterTypes = NULL;
|
||||
int numberOfParameters =
|
||||
ExtractParameterTypesForParamListInfo(orig_paramListInfo, ¶meterTypes);
|
||||
|
||||
Query *localShardQuery =
|
||||
ParseQueryString(shardQueryString, parameterTypes, numberOfParameters);
|
||||
|
||||
return localShardQuery;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DeparseLocalShardQuery is a helper function to deparse given jobQuery for the shard(s)
|
||||
* identified by the relationShardList, anchorDistributedTableId and anchorShardId.
|
||||
*
|
||||
* For the details and comparison with TaskQueryString(), see the comments in the function.
|
||||
*/
|
||||
static char *
|
||||
DeparseLocalShardQuery(Query *jobQuery, List *relationShardList, Oid
|
||||
anchorDistributedTableId, int64 anchorShardId)
|
||||
{
|
||||
StringInfo queryString = makeStringInfo();
|
||||
|
||||
/*
|
||||
* We imitate what TaskQueryString() does, but we cannot rely on that function
|
||||
* as the parameters might have been already resolved on the QueryTree in the
|
||||
* task. Instead, we operate on the jobQuery where are sure that the
|
||||
* coordination evaluation has not happened.
|
||||
*
|
||||
* Local shard queries are only applicable for local cached query execution.
|
||||
* In the local cached query execution mode, we can use a query structure
|
||||
* (or query string) with unevaluated expressions as we allow function calls
|
||||
* to be evaluated when the query on the shard is executed (e.g., do no have
|
||||
* coordinator evaluation, instead let Postgres executor evaluate values).
|
||||
*
|
||||
* Additionally, we can allow them to be evaluated again because they are stable,
|
||||
* and we do not cache plans / use unevaluated query strings for queries containing
|
||||
* volatile functions.
|
||||
*/
|
||||
if (jobQuery->commandType == CMD_INSERT)
|
||||
{
|
||||
/*
|
||||
* We currently do not support INSERT .. SELECT here. To support INSERT..SELECT
|
||||
* queries, we should update the relation names to shard names in the SELECT
|
||||
* clause (e.g., UpdateRelationToShardNames()).
|
||||
*/
|
||||
Assert(!CheckInsertSelectQuery(jobQuery));
|
||||
|
||||
AddInsertAliasIfNeeded(jobQuery);
|
||||
|
||||
/*
|
||||
* For INSERT queries we cannot use pg_get_query_def. Mainly because we
|
||||
* cannot run UpdateRelationToShardNames on an INSERT query. This is
|
||||
* because the PG deparsing logic fails when trying to insert into a
|
||||
* RTE_FUNCTION (which is what will happen if you call
|
||||
* UpdateRelationToShardNames).
|
||||
*/
|
||||
deparse_shard_query(jobQuery, anchorDistributedTableId, anchorShardId,
|
||||
queryString);
|
||||
}
|
||||
else
|
||||
{
|
||||
UpdateRelationToShardNames((Node *) jobQuery, relationShardList);
|
||||
|
||||
pg_get_query_def(jobQuery, queryString);
|
||||
}
|
||||
|
||||
return queryString->data;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ExtractParameterTypesForParamListInfo is a helper function which helps to
|
||||
* extract the parameter types of the given ParamListInfo via the second
|
||||
* parameter of the function.
|
||||
*
|
||||
* The function also returns the number of parameters. If no parameter exists,
|
||||
* the function returns 0.
|
||||
*/
|
||||
static int
|
||||
ExtractParameterTypesForParamListInfo(ParamListInfo originalParamListInfo,
|
||||
Oid **parameterTypes)
|
||||
{
|
||||
*parameterTypes = NULL;
|
||||
|
||||
int numberOfParameters = 0;
|
||||
if (originalParamListInfo != NULL)
|
||||
{
|
||||
const char **parameterValues = NULL;
|
||||
ParamListInfo paramListInfo = copyParamList(originalParamListInfo);
|
||||
ExtractParametersForLocalExecution(paramListInfo, parameterTypes,
|
||||
¶meterValues);
|
||||
numberOfParameters = paramListInfo->numParams;
|
||||
}
|
||||
|
||||
return numberOfParameters;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetCachedLocalPlan is a helper function which return the cached
|
||||
* plan in the distributedPlan for the given task if exists.
|
||||
|
|
|
@ -322,10 +322,6 @@ static Node * WorkerLimitCount(Node *limitCount, Node *limitOffset, OrderByLimit
|
|||
static List * WorkerSortClauseList(Node *limitCount,
|
||||
List *groupClauseList, List *sortClauseList,
|
||||
OrderByLimitReference orderByLimitReference);
|
||||
static List * GenerateNewTargetEntriesForSortClauses(List *originalTargetList,
|
||||
List *sortClauseList,
|
||||
AttrNumber *targetProjectionNumber,
|
||||
Index *nextSortGroupRefIndex);
|
||||
static bool CanPushDownLimitApproximate(List *sortClauseList, List *targetList);
|
||||
static bool HasOrderByAggregate(List *sortClauseList, List *targetList);
|
||||
static bool HasOrderByNonCommutativeAggregate(List *sortClauseList, List *targetList);
|
||||
|
@ -1624,7 +1620,19 @@ MasterAggregateExpression(Aggref *originalAggregate,
|
|||
Expr *directarg;
|
||||
foreach_ptr(directarg, originalAggregate->aggdirectargs)
|
||||
{
|
||||
if (!IsA(directarg, Const) && !IsA(directarg, Param))
|
||||
/*
|
||||
* Need to replace nodes that contain any Vars with Vars referring
|
||||
* to the related column of the result set returned for the worker
|
||||
* aggregation.
|
||||
*
|
||||
* When there are no Vars, then the expression can be fully evaluated
|
||||
* on the coordinator, so we skip it here. This is not just an
|
||||
* optimization, but the result of the expression might require
|
||||
* calling the final function of the aggregate, and doing so when
|
||||
* there are no input rows (i.e.: with an empty tuple slot) is not
|
||||
* desirable for the node-executor methods.
|
||||
*/
|
||||
if (pull_var_clause_default((Node *) directarg) != NIL)
|
||||
{
|
||||
Var *var = makeVar(masterTableId, walkerContext->columnId,
|
||||
exprType((Node *) directarg),
|
||||
|
@ -2705,38 +2713,6 @@ ProcessWindowFunctionsForWorkerQuery(List *windowClauseList,
|
|||
return;
|
||||
}
|
||||
|
||||
WindowClause *windowClause = NULL;
|
||||
foreach_ptr(windowClause, windowClauseList)
|
||||
{
|
||||
List *partitionClauseTargetList =
|
||||
GenerateNewTargetEntriesForSortClauses(originalTargetEntryList,
|
||||
windowClause->partitionClause,
|
||||
&(queryTargetList->
|
||||
targetProjectionNumber),
|
||||
queryWindowClause->
|
||||
nextSortGroupRefIndex);
|
||||
List *orderClauseTargetList =
|
||||
GenerateNewTargetEntriesForSortClauses(originalTargetEntryList,
|
||||
windowClause->orderClause,
|
||||
&(queryTargetList->
|
||||
targetProjectionNumber),
|
||||
queryWindowClause->
|
||||
nextSortGroupRefIndex);
|
||||
|
||||
/*
|
||||
* Note that even Citus does push down the window clauses as-is, we may still need to
|
||||
* add the generated entries to the target list. The reason is that the same aggregates
|
||||
* might be referred from another target entry that is a bare aggregate (e.g., no window
|
||||
* functions), which would have been mutated. For instance, when an average aggregate
|
||||
* is mutated on the target list, the window function would refer to a sum aggregate,
|
||||
* which is obviously wrong.
|
||||
*/
|
||||
queryTargetList->targetEntryList = list_concat(queryTargetList->targetEntryList,
|
||||
partitionClauseTargetList);
|
||||
queryTargetList->targetEntryList = list_concat(queryTargetList->targetEntryList,
|
||||
orderClauseTargetList);
|
||||
}
|
||||
|
||||
queryWindowClause->workerWindowClauseList = windowClauseList;
|
||||
queryWindowClause->hasWindowFunctions = true;
|
||||
}
|
||||
|
@ -2802,19 +2778,6 @@ ProcessLimitOrderByForWorkerQuery(OrderByLimitReference orderByLimitReference,
|
|||
groupClauseList,
|
||||
sortClauseList,
|
||||
orderByLimitReference);
|
||||
|
||||
/*
|
||||
* TODO: Do we really need to add the target entries if we're not pushing
|
||||
* down ORDER BY?
|
||||
*/
|
||||
List *newTargetEntryListForSortClauses =
|
||||
GenerateNewTargetEntriesForSortClauses(originalTargetList,
|
||||
queryOrderByLimit->workerSortClauseList,
|
||||
&(queryTargetList->targetProjectionNumber),
|
||||
queryOrderByLimit->nextSortGroupRefIndex);
|
||||
|
||||
queryTargetList->targetEntryList =
|
||||
list_concat(queryTargetList->targetEntryList, newTargetEntryListForSortClauses);
|
||||
}
|
||||
|
||||
|
||||
|
@ -3100,7 +3063,13 @@ WorkerAggregateExpressionList(Aggref *originalAggregate,
|
|||
Expr *directarg;
|
||||
foreach_ptr(directarg, originalAggregate->aggdirectargs)
|
||||
{
|
||||
if (!IsA(directarg, Const) && !IsA(directarg, Param))
|
||||
/*
|
||||
* The worker aggregation should execute any node that contains any
|
||||
* Var nodes and return the result in the targetlist, so that the
|
||||
* combine query can then fetch the result via remote scan; see
|
||||
* MasterAggregateExpression.
|
||||
*/
|
||||
if (pull_var_clause_default((Node *) directarg) != NIL)
|
||||
{
|
||||
workerAggregateList = lappend(workerAggregateList, directarg);
|
||||
}
|
||||
|
@ -4803,87 +4772,6 @@ WorkerSortClauseList(Node *limitCount, List *groupClauseList, List *sortClauseLi
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* GenerateNewTargetEntriesForSortClauses goes over provided sort clause lists and
|
||||
* creates new target entries if needed to make sure sort clauses has correct
|
||||
* references. The function returns list of new target entries, caller is
|
||||
* responsible to add those target entries to the end of worker target list.
|
||||
*
|
||||
* The function is required because we change the target entry if it contains an
|
||||
* expression having an aggregate operation, or just the AVG aggregate.
|
||||
* Afterwards any order by clause referring to original target entry starts
|
||||
* to point to a wrong expression.
|
||||
*
|
||||
* Note the function modifies SortGroupClause items in sortClauseList,
|
||||
* targetProjectionNumber, and nextSortGroupRefIndex.
|
||||
*/
|
||||
static List *
|
||||
GenerateNewTargetEntriesForSortClauses(List *originalTargetList,
|
||||
List *sortClauseList,
|
||||
AttrNumber *targetProjectionNumber,
|
||||
Index *nextSortGroupRefIndex)
|
||||
{
|
||||
List *createdTargetList = NIL;
|
||||
|
||||
SortGroupClause *sgClause = NULL;
|
||||
foreach_ptr(sgClause, sortClauseList)
|
||||
{
|
||||
TargetEntry *targetEntry = get_sortgroupclause_tle(sgClause, originalTargetList);
|
||||
Expr *targetExpr = targetEntry->expr;
|
||||
bool containsAggregate = contain_aggs_of_level((Node *) targetExpr, 0);
|
||||
bool createNewTargetEntry = false;
|
||||
|
||||
/* we are only interested in target entries containing aggregates */
|
||||
if (!containsAggregate)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the target expression is not an Aggref, it is either an expression
|
||||
* on a single aggregate, or expression containing multiple aggregates.
|
||||
* Worker query mutates these target entries to have a naked target entry
|
||||
* per aggregate function. We want to use original target entries if this
|
||||
* the case.
|
||||
* If the original target expression is an avg aggref, we also want to use
|
||||
* original target entry.
|
||||
*/
|
||||
if (!IsA(targetExpr, Aggref))
|
||||
{
|
||||
createNewTargetEntry = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
Aggref *aggNode = (Aggref *) targetExpr;
|
||||
AggregateType aggregateType = GetAggregateType(aggNode);
|
||||
if (aggregateType == AGGREGATE_AVERAGE)
|
||||
{
|
||||
createNewTargetEntry = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (createNewTargetEntry)
|
||||
{
|
||||
bool resJunk = true;
|
||||
AttrNumber nextResNo = (*targetProjectionNumber);
|
||||
Expr *newExpr = copyObject(targetExpr);
|
||||
TargetEntry *newTargetEntry = makeTargetEntry(newExpr, nextResNo,
|
||||
targetEntry->resname, resJunk);
|
||||
newTargetEntry->ressortgroupref = *nextSortGroupRefIndex;
|
||||
|
||||
createdTargetList = lappend(createdTargetList, newTargetEntry);
|
||||
|
||||
sgClause->tleSortGroupRef = *nextSortGroupRefIndex;
|
||||
|
||||
(*nextSortGroupRefIndex)++;
|
||||
(*targetProjectionNumber)++;
|
||||
}
|
||||
}
|
||||
|
||||
return createdTargetList;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CanPushDownLimitApproximate checks if we can push down the limit clause to
|
||||
* the worker nodes, and get approximate and meaningful results. We can do this
|
||||
|
|
|
@ -1534,6 +1534,7 @@ MultiTableNodeList(List *tableEntryList, List *rangeTableList)
|
|||
tableNode->partitionColumn = partitionColumn;
|
||||
tableNode->alias = rangeTableEntry->alias;
|
||||
tableNode->referenceNames = rangeTableEntry->eref;
|
||||
tableNode->includePartitions = GetOriginalInh(rangeTableEntry);
|
||||
|
||||
tableNodeList = lappend(tableNodeList, tableNode);
|
||||
}
|
||||
|
|
|
@ -45,6 +45,7 @@
|
|||
#include "distributed/multi_join_order.h"
|
||||
#include "distributed/multi_logical_optimizer.h"
|
||||
#include "distributed/multi_logical_planner.h"
|
||||
#include "distributed/multi_partitioning_utils.h"
|
||||
#include "distributed/multi_physical_planner.h"
|
||||
#include "distributed/log_utils.h"
|
||||
#include "distributed/pg_dist_partition.h"
|
||||
|
@ -743,6 +744,8 @@ BaseRangeTableList(MultiNode *multiNode)
|
|||
rangeTableEntry->eref = multiTable->referenceNames;
|
||||
rangeTableEntry->alias = multiTable->alias;
|
||||
rangeTableEntry->relid = multiTable->relationId;
|
||||
rangeTableEntry->inh = multiTable->includePartitions;
|
||||
|
||||
SetRangeTblExtraData(rangeTableEntry, CITUS_RTE_RELATION, NULL, NULL,
|
||||
list_make1_int(multiTable->rangeTableId),
|
||||
NIL, NIL, NIL, NIL);
|
||||
|
@ -824,7 +827,21 @@ static List *
|
|||
QueryTargetList(MultiNode *multiNode)
|
||||
{
|
||||
List *projectNodeList = FindNodesOfType(multiNode, T_MultiProject);
|
||||
Assert(list_length(projectNodeList) > 0);
|
||||
if (list_length(projectNodeList) == 0)
|
||||
{
|
||||
/*
|
||||
* The physical planner assumes that all worker queries would have
|
||||
* target list entries based on the fact that at least the column
|
||||
* on the JOINs have to be on the target list. However, there is
|
||||
* an exception to that if there is a cartesian product join and
|
||||
* there is no additional target list entries belong to one side
|
||||
* of the JOIN. Once we support cartesian product join, we should
|
||||
* remove this error.
|
||||
*/
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("cannot perform distributed planning on this query"),
|
||||
errdetail("Cartesian products are currently unsupported")));
|
||||
}
|
||||
|
||||
MultiProject *topProjectNode = (MultiProject *) linitial(projectNodeList);
|
||||
List *columnList = topProjectNode->columnList;
|
||||
|
@ -1454,6 +1471,7 @@ ConstructCallingRTE(RangeTblEntry *rangeTableEntry, List *dependentJobList)
|
|||
callingRTE->rtekind = RTE_RELATION;
|
||||
callingRTE->eref = rangeTableEntry->eref;
|
||||
callingRTE->relid = rangeTableEntry->relid;
|
||||
callingRTE->inh = rangeTableEntry->inh;
|
||||
}
|
||||
else if (rangeTableKind == CITUS_RTE_REMOTE_QUERY)
|
||||
{
|
||||
|
@ -4352,16 +4370,8 @@ FragmentAlias(RangeTblEntry *rangeTableEntry, RangeTableFragment *fragment)
|
|||
Oid relationId = rangeTableEntry->relid;
|
||||
char *relationName = get_rel_name(relationId);
|
||||
|
||||
/*
|
||||
* If the table is not in the default namespace (public), we include it in
|
||||
* the fragment alias.
|
||||
*/
|
||||
Oid schemaId = get_rel_namespace(relationId);
|
||||
schemaName = get_namespace_name(schemaId);
|
||||
if (strncmp(schemaName, "public", NAMEDATALEN) == 0)
|
||||
{
|
||||
schemaName = NULL;
|
||||
}
|
||||
|
||||
aliasName = relationName;
|
||||
|
||||
|
|
|
@ -555,6 +555,14 @@ ModifyPartialQuerySupported(Query *queryTree, bool multiShardQuery,
|
|||
{
|
||||
ListCell *cteCell = NULL;
|
||||
|
||||
/* CTEs still not supported for INSERTs. */
|
||||
if (queryTree->commandType == CMD_INSERT)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"Router planner doesn't support common table expressions with INSERT queries.",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
foreach(cteCell, queryTree->cteList)
|
||||
{
|
||||
CommonTableExpr *cte = (CommonTableExpr *) lfirst(cteCell);
|
||||
|
@ -562,31 +570,22 @@ ModifyPartialQuerySupported(Query *queryTree, bool multiShardQuery,
|
|||
|
||||
if (cteQuery->commandType != CMD_SELECT)
|
||||
{
|
||||
/* Modifying CTEs still not supported for INSERTs & multi shard queries. */
|
||||
if (queryTree->commandType == CMD_INSERT)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"Router planner doesn't support non-select common table expressions with non-select queries.",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
/* Modifying CTEs still not supported for multi shard queries. */
|
||||
if (multiShardQuery)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"Router planner doesn't support non-select common table expressions with multi shard queries.",
|
||||
NULL, NULL);
|
||||
}
|
||||
/* Modifying CTEs exclude both INSERT CTEs & INSERT queries. */
|
||||
else if (cteQuery->commandType == CMD_INSERT)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"Router planner doesn't support INSERT common table expressions.",
|
||||
NULL, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/* Modifying CTEs exclude both INSERT CTEs & INSERT queries. */
|
||||
if (cteQuery->commandType == CMD_INSERT)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"Router planner doesn't support INSERT common table expressions.",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
|
||||
if (cteQuery->hasForUpdate &&
|
||||
FindNodeMatchingCheckFunctionInRangeTableList(cteQuery->rtable,
|
||||
IsReferenceTableRTE))
|
||||
|
@ -2433,7 +2432,7 @@ CreateLocalDummyPlacement()
|
|||
{
|
||||
ShardPlacement *dummyPlacement = CitusMakeNode(ShardPlacement);
|
||||
dummyPlacement->nodeId = LOCAL_NODE_ID;
|
||||
dummyPlacement->nodeName = LOCAL_HOST_NAME;
|
||||
dummyPlacement->nodeName = LocalHostName;
|
||||
dummyPlacement->nodePort = PostPortNumber;
|
||||
dummyPlacement->groupId = GetLocalGroupId();
|
||||
return dummyPlacement;
|
||||
|
|
|
@ -61,6 +61,8 @@ typedef struct AttributeEquivalenceClass
|
|||
{
|
||||
uint32 equivalenceId;
|
||||
List *equivalentAttributes;
|
||||
|
||||
Index unionQueryPartitionKeyIndex;
|
||||
} AttributeEquivalenceClass;
|
||||
|
||||
/*
|
||||
|
@ -83,7 +85,8 @@ typedef struct AttributeEquivalenceClassMember
|
|||
|
||||
|
||||
static bool ContextContainsLocalRelation(RelationRestrictionContext *restrictionContext);
|
||||
static Var * FindUnionAllVar(PlannerInfo *root, List *appendRelList, Oid relationOid,
|
||||
static int RangeTableOffsetCompat(PlannerInfo *root, AppendRelInfo *appendRelInfo);
|
||||
static Var * FindUnionAllVar(PlannerInfo *root, List *translatedVars, Oid relationOid,
|
||||
Index relationRteIndex, Index *partitionKeyIndex);
|
||||
static bool ContainsMultipleDistributedRelations(PlannerRestrictionContext *
|
||||
plannerRestrictionContext);
|
||||
|
@ -91,11 +94,11 @@ static List * GenerateAttributeEquivalencesForRelationRestrictions(
|
|||
RelationRestrictionContext *restrictionContext);
|
||||
static AttributeEquivalenceClass * AttributeEquivalenceClassForEquivalenceClass(
|
||||
EquivalenceClass *plannerEqClass, RelationRestriction *relationRestriction);
|
||||
static void AddToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
||||
static void AddToAttributeEquivalenceClass(AttributeEquivalenceClass *
|
||||
attributeEquivalenceClass,
|
||||
PlannerInfo *root, Var *varToBeAdded);
|
||||
static void AddRteSubqueryToAttributeEquivalenceClass(AttributeEquivalenceClass *
|
||||
*attributeEquivalenceClass,
|
||||
attributeEquivalenceClass,
|
||||
RangeTblEntry *
|
||||
rangeTableEntry,
|
||||
PlannerInfo *root,
|
||||
|
@ -103,17 +106,17 @@ static void AddRteSubqueryToAttributeEquivalenceClass(AttributeEquivalenceClass
|
|||
static Query * GetTargetSubquery(PlannerInfo *root, RangeTblEntry *rangeTableEntry,
|
||||
Var *varToBeAdded);
|
||||
static void AddUnionAllSetOperationsToAttributeEquivalenceClass(
|
||||
AttributeEquivalenceClass **
|
||||
AttributeEquivalenceClass *
|
||||
attributeEquivalenceClass,
|
||||
PlannerInfo *root,
|
||||
Var *varToBeAdded);
|
||||
static void AddUnionSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
||||
static void AddUnionSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass *
|
||||
attributeEquivalenceClass,
|
||||
PlannerInfo *root,
|
||||
SetOperationStmt *
|
||||
setOperation,
|
||||
Var *varToBeAdded);
|
||||
static void AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
||||
static void AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass *
|
||||
attrEquivalenceClass,
|
||||
RangeTblEntry *rangeTableEntry,
|
||||
Var *varToBeAdded);
|
||||
|
@ -141,7 +144,7 @@ static AttributeEquivalenceClass * GenerateEquivalenceClassForRelationRestrictio
|
|||
RelationRestrictionContext
|
||||
*
|
||||
relationRestrictionContext);
|
||||
static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass **
|
||||
static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass *
|
||||
firstClass,
|
||||
AttributeEquivalenceClass *
|
||||
secondClass);
|
||||
|
@ -156,9 +159,13 @@ static JoinRestrictionContext * FilterJoinRestrictionContext(
|
|||
static bool RangeTableArrayContainsAnyRTEIdentities(RangeTblEntry **rangeTableEntries, int
|
||||
rangeTableArrayLength, Relids
|
||||
queryRteIdentities);
|
||||
static int RangeTableOffsetCompat(PlannerInfo *root, AppendRelInfo *appendRelInfo);
|
||||
static Relids QueryRteIdentities(Query *queryTree);
|
||||
|
||||
#if PG_VERSION_NUM >= PG_VERSION_13
|
||||
static int ParentCountPriorToAppendRel(List *appendRelList, AppendRelInfo *appendRelInfo);
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* AllDistributionKeysInQueryAreEqual returns true if either
|
||||
* (i) there exists join in the query and all relations joined on their
|
||||
|
@ -249,7 +256,7 @@ SafeToPushdownUnionSubquery(PlannerRestrictionContext *plannerRestrictionContext
|
|||
plannerRestrictionContext->relationRestrictionContext;
|
||||
JoinRestrictionContext *joinRestrictionContext =
|
||||
plannerRestrictionContext->joinRestrictionContext;
|
||||
Index unionQueryPartitionKeyIndex = 0;
|
||||
|
||||
AttributeEquivalenceClass *attributeEquivalence =
|
||||
palloc0(sizeof(AttributeEquivalenceClass));
|
||||
ListCell *relationRestrictionCell = NULL;
|
||||
|
@ -279,7 +286,8 @@ SafeToPushdownUnionSubquery(PlannerRestrictionContext *plannerRestrictionContext
|
|||
*/
|
||||
if (appendRelList != NULL)
|
||||
{
|
||||
varToBeAdded = FindUnionAllVar(relationPlannerRoot, appendRelList,
|
||||
varToBeAdded = FindUnionAllVar(relationPlannerRoot,
|
||||
relationRestriction->translatedVars,
|
||||
relationRestriction->relationId,
|
||||
relationRestriction->index,
|
||||
&partitionKeyIndex);
|
||||
|
@ -323,17 +331,17 @@ SafeToPushdownUnionSubquery(PlannerRestrictionContext *plannerRestrictionContext
|
|||
* we check whether all the relations have partition keys in the
|
||||
* same position.
|
||||
*/
|
||||
if (unionQueryPartitionKeyIndex == InvalidAttrNumber)
|
||||
if (attributeEquivalence->unionQueryPartitionKeyIndex == InvalidAttrNumber)
|
||||
{
|
||||
unionQueryPartitionKeyIndex = partitionKeyIndex;
|
||||
attributeEquivalence->unionQueryPartitionKeyIndex = partitionKeyIndex;
|
||||
}
|
||||
else if (unionQueryPartitionKeyIndex != partitionKeyIndex)
|
||||
else if (attributeEquivalence->unionQueryPartitionKeyIndex != partitionKeyIndex)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
Assert(varToBeAdded != NULL);
|
||||
AddToAttributeEquivalenceClass(&attributeEquivalence, relationPlannerRoot,
|
||||
AddToAttributeEquivalenceClass(attributeEquivalence, relationPlannerRoot,
|
||||
varToBeAdded);
|
||||
}
|
||||
|
||||
|
@ -373,66 +381,74 @@ SafeToPushdownUnionSubquery(PlannerRestrictionContext *plannerRestrictionContext
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* RangeTableOffsetCompat returns the range table offset(in glob->finalrtable) for the appendRelInfo.
|
||||
* For PG < 13 this is a no op.
|
||||
*/
|
||||
static int
|
||||
RangeTableOffsetCompat(PlannerInfo *root, AppendRelInfo *appendRelInfo)
|
||||
{
|
||||
#if PG_VERSION_NUM >= PG_VERSION_13
|
||||
int parentCount = ParentCountPriorToAppendRel(root->append_rel_list, appendRelInfo);
|
||||
int skipParentCount = parentCount - 1;
|
||||
|
||||
int i = 1;
|
||||
for (; i < root->simple_rel_array_size; i++)
|
||||
{
|
||||
RangeTblEntry *rte = root->simple_rte_array[i];
|
||||
if (rte->inh)
|
||||
{
|
||||
/*
|
||||
* We skip the previous parents because we want to find the offset
|
||||
* for the given append rel info.
|
||||
*/
|
||||
if (skipParentCount > 0)
|
||||
{
|
||||
skipParentCount--;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
int indexInRtable = (i - 1);
|
||||
|
||||
/*
|
||||
* Postgres adds the global rte array size to parent_relid as an offset.
|
||||
* Here we do the reverse operation: Commit on postgres side:
|
||||
* 6ef77cf46e81f45716ec981cb08781d426181378
|
||||
*/
|
||||
int parentRelIndex = appendRelInfo->parent_relid - 1;
|
||||
return parentRelIndex - indexInRtable;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* FindUnionAllVar finds the variable used in union all for the side that has
|
||||
* relationRteIndex as its index and the same varattno as the partition key of
|
||||
* the given relation with relationOid.
|
||||
*/
|
||||
static Var *
|
||||
FindUnionAllVar(PlannerInfo *root, List *appendRelList, Oid relationOid,
|
||||
FindUnionAllVar(PlannerInfo *root, List *translatedVars, Oid relationOid,
|
||||
Index relationRteIndex, Index *partitionKeyIndex)
|
||||
{
|
||||
ListCell *appendRelCell = NULL;
|
||||
AppendRelInfo *targetAppendRelInfo = NULL;
|
||||
AttrNumber childAttrNumber = 0;
|
||||
|
||||
*partitionKeyIndex = 0;
|
||||
|
||||
/* iterate on the queries that are part of UNION ALL subselects */
|
||||
foreach(appendRelCell, appendRelList)
|
||||
{
|
||||
AppendRelInfo *appendRelInfo = (AppendRelInfo *) lfirst(appendRelCell);
|
||||
|
||||
|
||||
int rtoffset = RangeTableOffsetCompat(root, appendRelInfo);
|
||||
|
||||
/*
|
||||
* We're only interested in the child rel that is equal to the
|
||||
* relation we're investigating.
|
||||
*/
|
||||
if (appendRelInfo->child_relid - rtoffset == relationRteIndex)
|
||||
{
|
||||
targetAppendRelInfo = appendRelInfo;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!targetAppendRelInfo)
|
||||
if (!IsCitusTableType(relationOid, STRICTLY_PARTITIONED_DISTRIBUTED_TABLE))
|
||||
{
|
||||
/* we only care about hash and range partitioned tables */
|
||||
*partitionKeyIndex = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Var *relationPartitionKey = DistPartitionKeyOrError(relationOid);
|
||||
|
||||
#if PG_VERSION_NUM >= PG_VERSION_13
|
||||
for (; childAttrNumber < targetAppendRelInfo->num_child_cols; childAttrNumber++)
|
||||
{
|
||||
int curAttNo = targetAppendRelInfo->parent_colnos[childAttrNumber];
|
||||
if (curAttNo == relationPartitionKey->varattno)
|
||||
{
|
||||
*partitionKeyIndex = (childAttrNumber + 1);
|
||||
int rtoffset = RangeTableOffsetCompat(root, targetAppendRelInfo);
|
||||
relationPartitionKey->varno = targetAppendRelInfo->child_relid - rtoffset;
|
||||
return relationPartitionKey;
|
||||
}
|
||||
}
|
||||
#else
|
||||
AttrNumber childAttrNumber = 0;
|
||||
*partitionKeyIndex = 0;
|
||||
ListCell *translatedVarCell;
|
||||
List *translaterVars = targetAppendRelInfo->translated_vars;
|
||||
foreach(translatedVarCell, translaterVars)
|
||||
foreach(translatedVarCell, translatedVars)
|
||||
{
|
||||
Node *targetNode = (Node *) lfirst(translatedVarCell);
|
||||
|
||||
childAttrNumber++;
|
||||
|
||||
if (!IsA(targetNode, Var))
|
||||
|
@ -449,7 +465,6 @@ FindUnionAllVar(PlannerInfo *root, List *appendRelList, Oid relationOid,
|
|||
return targetVar;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -580,7 +595,6 @@ GenerateAllAttributeEquivalences(PlannerRestrictionContext *plannerRestrictionCo
|
|||
JoinRestrictionContext *joinRestrictionContext =
|
||||
plannerRestrictionContext->joinRestrictionContext;
|
||||
|
||||
|
||||
/* reset the equivalence id counter per call to prevent overflows */
|
||||
attributeEquivalenceId = 1;
|
||||
|
||||
|
@ -788,14 +802,14 @@ AttributeEquivalenceClassForEquivalenceClass(EquivalenceClass *plannerEqClass,
|
|||
equivalenceParam, &outerNodeRoot);
|
||||
if (expressionVar)
|
||||
{
|
||||
AddToAttributeEquivalenceClass(&attributeEquivalence, outerNodeRoot,
|
||||
AddToAttributeEquivalenceClass(attributeEquivalence, outerNodeRoot,
|
||||
expressionVar);
|
||||
}
|
||||
}
|
||||
else if (IsA(strippedEquivalenceExpr, Var))
|
||||
{
|
||||
expressionVar = (Var *) strippedEquivalenceExpr;
|
||||
AddToAttributeEquivalenceClass(&attributeEquivalence, plannerInfo,
|
||||
AddToAttributeEquivalenceClass(attributeEquivalence, plannerInfo,
|
||||
expressionVar);
|
||||
}
|
||||
}
|
||||
|
@ -978,7 +992,7 @@ GenerateCommonEquivalence(List *attributeEquivalenceList,
|
|||
if (AttributeClassContainsAttributeClassMember(attributeEquialanceMember,
|
||||
commonEquivalenceClass))
|
||||
{
|
||||
ListConcatUniqueAttributeClassMemberLists(&commonEquivalenceClass,
|
||||
ListConcatUniqueAttributeClassMemberLists(commonEquivalenceClass,
|
||||
currentEquivalenceClass);
|
||||
|
||||
addedEquivalenceIds = bms_add_member(addedEquivalenceIds,
|
||||
|
@ -1058,7 +1072,7 @@ GenerateEquivalenceClassForRelationRestriction(
|
|||
* firstClass.
|
||||
*/
|
||||
static void
|
||||
ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass **firstClass,
|
||||
ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass *firstClass,
|
||||
AttributeEquivalenceClass *secondClass)
|
||||
{
|
||||
ListCell *equivalenceClassMemberCell = NULL;
|
||||
|
@ -1069,13 +1083,13 @@ ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass **firstClass
|
|||
AttributeEquivalenceClassMember *newEqMember =
|
||||
(AttributeEquivalenceClassMember *) lfirst(equivalenceClassMemberCell);
|
||||
|
||||
if (AttributeClassContainsAttributeClassMember(newEqMember, *firstClass))
|
||||
if (AttributeClassContainsAttributeClassMember(newEqMember, firstClass))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
(*firstClass)->equivalentAttributes = lappend((*firstClass)->equivalentAttributes,
|
||||
newEqMember);
|
||||
firstClass->equivalentAttributes = lappend(firstClass->equivalentAttributes,
|
||||
newEqMember);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1150,10 +1164,10 @@ GenerateAttributeEquivalencesForJoinRestrictions(JoinRestrictionContext *
|
|||
sizeof(AttributeEquivalenceClass));
|
||||
attributeEquivalence->equivalenceId = attributeEquivalenceId++;
|
||||
|
||||
AddToAttributeEquivalenceClass(&attributeEquivalence,
|
||||
AddToAttributeEquivalenceClass(attributeEquivalence,
|
||||
joinRestriction->plannerInfo, leftVar);
|
||||
|
||||
AddToAttributeEquivalenceClass(&attributeEquivalence,
|
||||
AddToAttributeEquivalenceClass(attributeEquivalence,
|
||||
joinRestriction->plannerInfo, rightVar);
|
||||
|
||||
attributeEquivalenceList =
|
||||
|
@ -1194,7 +1208,7 @@ GenerateAttributeEquivalencesForJoinRestrictions(JoinRestrictionContext *
|
|||
* equivalence class
|
||||
*/
|
||||
static void
|
||||
AddToAttributeEquivalenceClass(AttributeEquivalenceClass **attributeEquivalenceClass,
|
||||
AddToAttributeEquivalenceClass(AttributeEquivalenceClass *attributeEquivalenceClass,
|
||||
PlannerInfo *root, Var *varToBeAdded)
|
||||
{
|
||||
/* punt if it's a whole-row var rather than a plain column reference */
|
||||
|
@ -1233,9 +1247,10 @@ AddToAttributeEquivalenceClass(AttributeEquivalenceClass **attributeEquivalenceC
|
|||
*/
|
||||
static void
|
||||
AddRteSubqueryToAttributeEquivalenceClass(AttributeEquivalenceClass
|
||||
**attributeEquivalenceClass,
|
||||
*attributeEquivalenceClass,
|
||||
RangeTblEntry *rangeTableEntry,
|
||||
PlannerInfo *root, Var *varToBeAdded)
|
||||
PlannerInfo *root,
|
||||
Var *varToBeAdded)
|
||||
{
|
||||
RelOptInfo *baseRelOptInfo = find_base_rel(root, varToBeAdded->varno);
|
||||
Query *targetSubquery = GetTargetSubquery(root, rangeTableEntry, varToBeAdded);
|
||||
|
@ -1355,7 +1370,7 @@ GetTargetSubquery(PlannerInfo *root, RangeTblEntry *rangeTableEntry, Var *varToB
|
|||
* var the given equivalence class.
|
||||
*/
|
||||
static void
|
||||
AddUnionAllSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
||||
AddUnionAllSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass *
|
||||
attributeEquivalenceClass,
|
||||
PlannerInfo *root,
|
||||
Var *varToBeAdded)
|
||||
|
@ -1377,41 +1392,101 @@ AddUnionAllSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
|||
continue;
|
||||
}
|
||||
int rtoffset = RangeTableOffsetCompat(root, appendRelInfo);
|
||||
int childRelId = appendRelInfo->child_relid - rtoffset;
|
||||
|
||||
/* set the varno accordingly for this specific child */
|
||||
varToBeAdded->varno = appendRelInfo->child_relid - rtoffset;
|
||||
if (root->simple_rel_array_size <= childRelId)
|
||||
{
|
||||
/* we prefer to return over an Assert or error to be defensive */
|
||||
return;
|
||||
}
|
||||
|
||||
AddToAttributeEquivalenceClass(attributeEquivalenceClass, root,
|
||||
varToBeAdded);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RangeTableOffsetCompat returns the range table offset(in glob->finalrtable) for the appendRelInfo.
|
||||
* For PG < 13 this is a no op.
|
||||
*/
|
||||
static int
|
||||
RangeTableOffsetCompat(PlannerInfo *root, AppendRelInfo *appendRelInfo)
|
||||
{
|
||||
#if PG_VERSION_NUM >= PG_VERSION_13
|
||||
int i = 1;
|
||||
for (; i < root->simple_rel_array_size; i++)
|
||||
{
|
||||
RangeTblEntry *rte = root->simple_rte_array[i];
|
||||
RangeTblEntry *rte = root->simple_rte_array[childRelId];
|
||||
if (rte->inh)
|
||||
{
|
||||
break;
|
||||
/*
|
||||
* This code-path may require improvements. If a leaf of a UNION ALL
|
||||
* (e.g., an entry in appendRelList) itself is another UNION ALL
|
||||
* (e.g., rte->inh = true), the logic here might get into an infinite
|
||||
* recursion.
|
||||
*
|
||||
* The downside of "continue" here is that certain UNION ALL queries
|
||||
* that are safe to pushdown may not be pushed down.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
else if (rte->rtekind == RTE_RELATION)
|
||||
{
|
||||
Index partitionKeyIndex = 0;
|
||||
List *translatedVars = TranslatedVarsForRteIdentity(GetRTEIdentity(rte));
|
||||
Var *varToBeAddedOnUnionAllSubquery =
|
||||
FindUnionAllVar(root, translatedVars, rte->relid, childRelId,
|
||||
&partitionKeyIndex);
|
||||
if (partitionKeyIndex == 0)
|
||||
{
|
||||
/* no partition key on the target list */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (attributeEquivalenceClass->unionQueryPartitionKeyIndex == 0)
|
||||
{
|
||||
/* the first partition key index we found */
|
||||
attributeEquivalenceClass->unionQueryPartitionKeyIndex =
|
||||
partitionKeyIndex;
|
||||
}
|
||||
else if (attributeEquivalenceClass->unionQueryPartitionKeyIndex !=
|
||||
partitionKeyIndex)
|
||||
{
|
||||
/*
|
||||
* Partition keys on the leaves of the UNION ALL queries on
|
||||
* different ordinal positions. We cannot pushdown, so skip.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
if (varToBeAddedOnUnionAllSubquery != NULL)
|
||||
{
|
||||
AddToAttributeEquivalenceClass(attributeEquivalenceClass, root,
|
||||
varToBeAddedOnUnionAllSubquery);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* set the varno accordingly for this specific child */
|
||||
varToBeAdded->varno = childRelId;
|
||||
|
||||
AddToAttributeEquivalenceClass(attributeEquivalenceClass, root,
|
||||
varToBeAdded);
|
||||
}
|
||||
}
|
||||
int indexInRtable = (i - 1);
|
||||
return appendRelInfo->parent_relid - 1 - (indexInRtable);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#if PG_VERSION_NUM >= PG_VERSION_13
|
||||
|
||||
/*
|
||||
* ParentCountPriorToAppendRel returns the number of parents that come before
|
||||
* the given append rel info.
|
||||
*/
|
||||
static int
|
||||
ParentCountPriorToAppendRel(List *appendRelList, AppendRelInfo *targetAppendRelInfo)
|
||||
{
|
||||
int targetParentIndex = targetAppendRelInfo->parent_relid;
|
||||
Bitmapset *parent_ids = NULL;
|
||||
AppendRelInfo *appendRelInfo = NULL;
|
||||
foreach_ptr(appendRelInfo, appendRelList)
|
||||
{
|
||||
int curParentIndex = appendRelInfo->parent_relid;
|
||||
if (curParentIndex <= targetParentIndex)
|
||||
{
|
||||
parent_ids = bms_add_member(parent_ids, curParentIndex);
|
||||
}
|
||||
}
|
||||
return bms_num_members(parent_ids);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* AddUnionSetOperationsToAttributeEquivalenceClass recursively iterates on all the
|
||||
* setOperations and adds each corresponding target entry to the given equivalence
|
||||
|
@ -1422,7 +1497,7 @@ RangeTableOffsetCompat(PlannerInfo *root, AppendRelInfo *appendRelInfo)
|
|||
* messages.
|
||||
*/
|
||||
static void
|
||||
AddUnionSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
||||
AddUnionSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass *
|
||||
attributeEquivalenceClass,
|
||||
PlannerInfo *root,
|
||||
SetOperationStmt *setOperation,
|
||||
|
@ -1450,7 +1525,7 @@ AddUnionSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
|||
* the input rte to be an RTE_RELATION.
|
||||
*/
|
||||
static void
|
||||
AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
||||
AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass *
|
||||
attrEquivalenceClass,
|
||||
RangeTblEntry *rangeTableEntry,
|
||||
Var *varToBeAdded)
|
||||
|
@ -1487,8 +1562,8 @@ AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
|||
attributeEqMember->rteIdentity = GetRTEIdentity(rangeTableEntry);
|
||||
attributeEqMember->relationId = rangeTableEntry->relid;
|
||||
|
||||
(*attrEquivalenceClass)->equivalentAttributes =
|
||||
lappend((*attrEquivalenceClass)->equivalentAttributes,
|
||||
attrEquivalenceClass->equivalentAttributes =
|
||||
lappend(attrEquivalenceClass->equivalentAttributes,
|
||||
attributeEqMember);
|
||||
}
|
||||
|
||||
|
|
|
@ -1575,6 +1575,22 @@ LowerShardBoundary(Datum partitionColumnValue, ShardInterval **shardIntervalCach
|
|||
/* setup partitionColumnValue argument once */
|
||||
fcSetArg(compareFunction, 0, partitionColumnValue);
|
||||
|
||||
/*
|
||||
* Now we test partitionColumnValue used in where clause such as
|
||||
* partCol > partitionColumnValue (or partCol >= partitionColumnValue)
|
||||
* against four possibilities, these are:
|
||||
* 1) partitionColumnValue falls into a specific shard, such that:
|
||||
* partitionColumnValue >= shard[x].min, and
|
||||
* partitionColumnValue < shard[x].max (or partitionColumnValue <= shard[x].max).
|
||||
* 2) partitionColumnValue < shard[x].min for all the shards
|
||||
* 3) partitionColumnValue > shard[x].max for all the shards
|
||||
* 4) partitionColumnValue falls in between two shards, such that:
|
||||
* partitionColumnValue > shard[x].max and
|
||||
* partitionColumnValue < shard[x+1].min
|
||||
*
|
||||
* For 1), we find that shard in below loop using binary search and
|
||||
* return the index of it. For the others, see the end of this function.
|
||||
*/
|
||||
while (lowerBoundIndex < upperBoundIndex)
|
||||
{
|
||||
int middleIndex = lowerBoundIndex + ((upperBoundIndex - lowerBoundIndex) / 2);
|
||||
|
@ -1607,7 +1623,7 @@ LowerShardBoundary(Datum partitionColumnValue, ShardInterval **shardIntervalCach
|
|||
continue;
|
||||
}
|
||||
|
||||
/* found interval containing partitionValue */
|
||||
/* partitionColumnValue falls into a specific shard, possibility 1) */
|
||||
return middleIndex;
|
||||
}
|
||||
|
||||
|
@ -1618,20 +1634,30 @@ LowerShardBoundary(Datum partitionColumnValue, ShardInterval **shardIntervalCach
|
|||
* (we'd have hit the return middleIndex; case otherwise). Figure out
|
||||
* whether there's possibly any interval containing a value that's bigger
|
||||
* than the partition key one.
|
||||
*
|
||||
* Also note that we initialized lowerBoundIndex with 0. Similarly,
|
||||
* we always set it to the index of the shard that we consider as our
|
||||
* lower boundary during binary search.
|
||||
*/
|
||||
if (lowerBoundIndex == 0)
|
||||
if (lowerBoundIndex == shardCount)
|
||||
{
|
||||
/* all intervals are bigger, thus return 0 */
|
||||
return 0;
|
||||
}
|
||||
else if (lowerBoundIndex == shardCount)
|
||||
{
|
||||
/* partition value is bigger than all partition values */
|
||||
/*
|
||||
* Since lowerBoundIndex is an inclusive index, being equal to shardCount
|
||||
* means all the shards have smaller values than partitionColumnValue,
|
||||
* which corresponds to possibility 3).
|
||||
* In that case, since we can't have a lower bound shard, we return
|
||||
* INVALID_SHARD_INDEX here.
|
||||
*/
|
||||
return INVALID_SHARD_INDEX;
|
||||
}
|
||||
|
||||
/* value falls inbetween intervals */
|
||||
return lowerBoundIndex + 1;
|
||||
/*
|
||||
* partitionColumnValue is either smaller than all the shards or falls in
|
||||
* between two shards, which corresponds to possibility 2) or 4).
|
||||
* Knowing that lowerBoundIndex is an inclusive index, we directly return
|
||||
* it as the index for the lower bound shard here.
|
||||
*/
|
||||
return lowerBoundIndex;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1651,6 +1677,23 @@ UpperShardBoundary(Datum partitionColumnValue, ShardInterval **shardIntervalCach
|
|||
/* setup partitionColumnValue argument once */
|
||||
fcSetArg(compareFunction, 0, partitionColumnValue);
|
||||
|
||||
/*
|
||||
* Now we test partitionColumnValue used in where clause such as
|
||||
* partCol < partitionColumnValue (or partCol <= partitionColumnValue)
|
||||
* against four possibilities, these are:
|
||||
* 1) partitionColumnValue falls into a specific shard, such that:
|
||||
* partitionColumnValue <= shard[x].max, and
|
||||
* partitionColumnValue > shard[x].min (or partitionColumnValue >= shard[x].min).
|
||||
* 2) partitionColumnValue > shard[x].max for all the shards
|
||||
* 3) partitionColumnValue < shard[x].min for all the shards
|
||||
* 4) partitionColumnValue falls in between two shards, such that:
|
||||
* partitionColumnValue > shard[x].max and
|
||||
* partitionColumnValue < shard[x+1].min
|
||||
*
|
||||
* For 1), we find that shard in below loop using binary search and
|
||||
* return the index of it. For the others, see the end of this function.
|
||||
*/
|
||||
|
||||
while (lowerBoundIndex < upperBoundIndex)
|
||||
{
|
||||
int middleIndex = lowerBoundIndex + ((upperBoundIndex - lowerBoundIndex) / 2);
|
||||
|
@ -1683,7 +1726,7 @@ UpperShardBoundary(Datum partitionColumnValue, ShardInterval **shardIntervalCach
|
|||
continue;
|
||||
}
|
||||
|
||||
/* found interval containing partitionValue */
|
||||
/* partitionColumnValue falls into a specific shard, possibility 1) */
|
||||
return middleIndex;
|
||||
}
|
||||
|
||||
|
@ -1694,19 +1737,29 @@ UpperShardBoundary(Datum partitionColumnValue, ShardInterval **shardIntervalCach
|
|||
* (we'd have hit the return middleIndex; case otherwise). Figure out
|
||||
* whether there's possibly any interval containing a value that's smaller
|
||||
* than the partition key one.
|
||||
*
|
||||
* Also note that we initialized upperBoundIndex with shardCount. Similarly,
|
||||
* we always set it to the index of the next shard that we consider as our
|
||||
* upper boundary during binary search.
|
||||
*/
|
||||
if (upperBoundIndex == shardCount)
|
||||
if (upperBoundIndex == 0)
|
||||
{
|
||||
/* all intervals are smaller, thus return 0 */
|
||||
return shardCount - 1;
|
||||
}
|
||||
else if (upperBoundIndex == 0)
|
||||
{
|
||||
/* partition value is smaller than all partition values */
|
||||
/*
|
||||
* Since upperBoundIndex is an exclusive index, being equal to 0 means
|
||||
* all the shards have greater values than partitionColumnValue, which
|
||||
* corresponds to possibility 3).
|
||||
* In that case, since we can't have an upper bound shard, we return
|
||||
* INVALID_SHARD_INDEX here.
|
||||
*/
|
||||
return INVALID_SHARD_INDEX;
|
||||
}
|
||||
|
||||
/* value falls inbetween intervals, return the inverval one smaller as bound */
|
||||
/*
|
||||
* partitionColumnValue is either greater than all the shards or falls in
|
||||
* between two shards, which corresponds to possibility 2) or 4).
|
||||
* Knowing that upperBoundIndex is an exclusive index, we return the index
|
||||
* for the previous shard here.
|
||||
*/
|
||||
return upperBoundIndex - 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -27,18 +27,16 @@ static ProgressMonitorData * MonitorDataFromDSMHandle(dsm_handle dsmHandle,
|
|||
|
||||
|
||||
/*
|
||||
* CreateProgressMonitor is used to create a place to store progress information related
|
||||
* to long running processes. The function creates a dynamic shared memory segment
|
||||
* consisting of a header regarding to the process and an array of "steps" that the long
|
||||
* running "operations" consists of. The handle of the dynamic shared memory is stored in
|
||||
* pg_stat_get_progress_info output, to be parsed by a progress retrieval command
|
||||
* later on. This behavior may cause unrelated (but hopefully harmless) rows in
|
||||
* pg_stat_progress_vacuum output. The caller of this function should provide a magic
|
||||
* number, a unique 64 bit unsigned integer, to distinguish different types of commands.
|
||||
* CreateProgressMonitor is used to create a place to store progress
|
||||
* information related to long running processes. The function creates a
|
||||
* dynamic shared memory segment consisting of a header regarding to the
|
||||
* process and an array of "steps" that the long running "operations" consists
|
||||
* of. After initializing the data in the array of steps, the shared memory
|
||||
* segment can be shared with other processes using RegisterProgressMonitor, by
|
||||
* giving it the value that's written to the dsmHandle argument.
|
||||
*/
|
||||
ProgressMonitorData *
|
||||
CreateProgressMonitor(uint64 progressTypeMagicNumber, int stepCount, Size stepSize,
|
||||
Oid relationId)
|
||||
CreateProgressMonitor(int stepCount, Size stepSize, dsm_handle *dsmHandle)
|
||||
{
|
||||
if (stepSize <= 0 || stepCount <= 0)
|
||||
{
|
||||
|
@ -58,20 +56,37 @@ CreateProgressMonitor(uint64 progressTypeMagicNumber, int stepCount, Size stepSi
|
|||
return NULL;
|
||||
}
|
||||
|
||||
dsm_handle dsmHandle = dsm_segment_handle(dsmSegment);
|
||||
*dsmHandle = dsm_segment_handle(dsmSegment);
|
||||
|
||||
ProgressMonitorData *monitor = MonitorDataFromDSMHandle(dsmHandle, &dsmSegment);
|
||||
ProgressMonitorData *monitor = MonitorDataFromDSMHandle(*dsmHandle, &dsmSegment);
|
||||
|
||||
monitor->stepCount = stepCount;
|
||||
monitor->processId = MyProcPid;
|
||||
return monitor;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RegisterProgressMonitor shares dsmHandle with other postgres process by
|
||||
* storing it in pg_stat_get_progress_info output, to be parsed by a
|
||||
* progress retrieval command later on. This behavior may cause unrelated (but
|
||||
* hopefully harmless) rows in pg_stat_progress_vacuum output. The caller of
|
||||
* this function should provide a magic number, a unique 64 bit unsigned
|
||||
* integer, to distinguish different types of commands.
|
||||
*
|
||||
* IMPORTANT: After registering the progress monitor, all modification to the
|
||||
* data should be done using concurrency safe operations (i.e. locks and
|
||||
* atomics)
|
||||
*/
|
||||
void
|
||||
RegisterProgressMonitor(uint64 progressTypeMagicNumber, Oid relationId,
|
||||
dsm_handle dsmHandle)
|
||||
{
|
||||
pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM, relationId);
|
||||
pgstat_progress_update_param(1, dsmHandle);
|
||||
pgstat_progress_update_param(0, progressTypeMagicNumber);
|
||||
|
||||
currentProgressDSMHandle = dsmHandle;
|
||||
|
||||
return monitor;
|
||||
}
|
||||
|
||||
|
||||
|
@ -204,24 +219,46 @@ ProgressMonitorData *
|
|||
MonitorDataFromDSMHandle(dsm_handle dsmHandle, dsm_segment **attachedSegment)
|
||||
{
|
||||
dsm_segment *dsmSegment = dsm_find_mapping(dsmHandle);
|
||||
ProgressMonitorData *monitor = NULL;
|
||||
|
||||
if (dsmSegment == NULL)
|
||||
{
|
||||
dsmSegment = dsm_attach(dsmHandle);
|
||||
}
|
||||
|
||||
if (dsmSegment != NULL)
|
||||
if (dsmSegment == NULL)
|
||||
{
|
||||
monitor = (ProgressMonitorData *) dsm_segment_address(dsmSegment);
|
||||
monitor->steps = (void *) (monitor + 1);
|
||||
*attachedSegment = dsmSegment;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ProgressMonitorData *monitor = (ProgressMonitorData *) dsm_segment_address(
|
||||
dsmSegment);
|
||||
|
||||
*attachedSegment = dsmSegment;
|
||||
|
||||
return monitor;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ProgressMonitorSteps returns a pointer to the array of steps that are stored
|
||||
* in a progress monitor. This is simply the data right after the header, so
|
||||
* this function is trivial. The main purpose of this function is to make the
|
||||
* intent clear to readers of the code.
|
||||
*
|
||||
* NOTE: The pointer this function returns is explicitly not stored in the
|
||||
* header, because the header is shared between processes. The absolute pointer
|
||||
* to the steps can have a different value between processes though, because
|
||||
* the same piece of shared memory often has a different address in different
|
||||
* processes. So we calculate this pointer over and over to make sure we use
|
||||
* the right value for each process.
|
||||
*/
|
||||
void *
|
||||
ProgressMonitorSteps(ProgressMonitorData *monitor)
|
||||
{
|
||||
return monitor + 1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DetachFromDSMSegments ensures that the process is detached from all of the segments in
|
||||
* the given list.
|
||||
|
|
|
@ -556,30 +556,6 @@ RelayEventExtendNames(Node *parseTree, char *schemaName, uint64 shardId)
|
|||
|
||||
AppendShardIdToName(oldRelationName, shardId);
|
||||
AppendShardIdToName(newRelationName, shardId);
|
||||
|
||||
/*
|
||||
* PostgreSQL creates array types for each ordinary table, with
|
||||
* the same name plus a prefix of '_'.
|
||||
*
|
||||
* ALTER TABLE ... RENAME TO ... also renames the underlying
|
||||
* array type, and the DDL is run in parallel connections over
|
||||
* all the placements and shards at once. Concurrent access
|
||||
* here deadlocks.
|
||||
*
|
||||
* Let's provide an easier to understand error message here
|
||||
* than the deadlock one.
|
||||
*
|
||||
* See also https://github.com/citusdata/citus/issues/1664
|
||||
*/
|
||||
int newRelationNameLength = strlen(*newRelationName);
|
||||
if (newRelationNameLength >= (NAMEDATALEN - 1))
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_NAME_TOO_LONG),
|
||||
errmsg(
|
||||
"shard name %s exceeds %d characters",
|
||||
*newRelationName, NAMEDATALEN - 1)));
|
||||
}
|
||||
}
|
||||
else if (objectType == OBJECT_COLUMN)
|
||||
{
|
||||
|
|
|
@ -701,6 +701,19 @@ RegisterCitusConfigVariables(void)
|
|||
GUC_NO_SHOW_ALL,
|
||||
NoticeIfSubqueryPushdownEnabled, NULL, NULL);
|
||||
|
||||
DefineCustomIntVariable(
|
||||
"citus.remote_copy_flush_threshold",
|
||||
gettext_noop("Sets the threshold for remote copy to be flushed."),
|
||||
gettext_noop("When sending data over remote connections via the COPY protocol, "
|
||||
"bytes are first buffered internally by libpq. If the number of "
|
||||
"bytes buffered exceeds the threshold, Citus waits for all the "
|
||||
"bytes to flush."),
|
||||
&RemoteCopyFlushThreshold,
|
||||
8 * 1024 * 1024, 0, INT_MAX,
|
||||
PGC_USERSET,
|
||||
GUC_UNIT_BYTE | GUC_NO_SHOW_ALL,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomIntVariable(
|
||||
"citus.local_copy_flush_threshold",
|
||||
gettext_noop("Sets the threshold for local copy to be flushed."),
|
||||
|
@ -1238,6 +1251,16 @@ RegisterCitusConfigVariables(void)
|
|||
GUC_STANDARD,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomIntVariable(
|
||||
"citus.max_cached_connection_lifetime",
|
||||
gettext_noop("Sets the maximum lifetime of cached connections to other nodes."),
|
||||
NULL,
|
||||
&MaxCachedConnectionLifetime,
|
||||
10 * MS_PER_MINUTE, -1, INT_MAX,
|
||||
PGC_USERSET,
|
||||
GUC_UNIT_MS | GUC_STANDARD,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomIntVariable(
|
||||
"citus.repartition_join_bucket_count_per_node",
|
||||
gettext_noop("Sets the bucket size for repartition joins per node"),
|
||||
|
@ -1454,6 +1477,19 @@ RegisterCitusConfigVariables(void)
|
|||
GUC_STANDARD,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomStringVariable(
|
||||
"citus.local_hostname",
|
||||
gettext_noop("Sets the hostname when connecting back to itself."),
|
||||
gettext_noop("For some operations nodes, mostly the coordinator, connect back to "
|
||||
"itself. When configuring SSL certificates it sometimes is required "
|
||||
"to use a specific hostname to match the CN of the certificate when "
|
||||
"verify-full is used."),
|
||||
&LocalHostName,
|
||||
"localhost",
|
||||
PGC_SUSET,
|
||||
GUC_STANDARD,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
"citus.writable_standby_coordinator",
|
||||
gettext_noop("Enables simple DML via a streaming replica of the coordinator"),
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
-- citus--10.0-1--10.0-2
|
||||
|
||||
#include "../../columnar/sql/columnar--10.0-1--10.0-2.sql"
|
||||
|
||||
GRANT SELECT ON public.citus_tables TO public;
|
|
@ -0,0 +1,18 @@
|
|||
-- citus--10.0-2--10.0-3
|
||||
|
||||
#include "udfs/citus_update_table_statistics/10.0-3.sql"
|
||||
|
||||
CREATE OR REPLACE FUNCTION master_update_table_statistics(relation regclass)
|
||||
RETURNS VOID
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$citus_update_table_statistics$$;
|
||||
COMMENT ON FUNCTION pg_catalog.master_update_table_statistics(regclass)
|
||||
IS 'updates shard statistics of the given table';
|
||||
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.citus_get_active_worker_nodes(OUT node_name text, OUT node_port bigint)
|
||||
RETURNS SETOF record
|
||||
LANGUAGE C STRICT ROWS 100
|
||||
AS 'MODULE_PATHNAME', $$citus_get_active_worker_nodes$$;
|
||||
COMMENT ON FUNCTION pg_catalog.citus_get_active_worker_nodes()
|
||||
IS 'fetch set of active worker nodes';
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
-- citus--10.0-3--10.0-4
|
||||
|
||||
-- This migration file aims to fix 2 issues with upgrades on clusters
|
||||
|
||||
-- 1. a bug in public schema dependency for citus_tables view.
|
||||
--
|
||||
-- Users who do not have public schema in their clusters were unable to upgrade
|
||||
-- to Citus 10.x due to the citus_tables view that used to be created in public
|
||||
-- schema
|
||||
|
||||
#include "udfs/citus_tables/10.0-4.sql"
|
||||
|
||||
-- 2. a bug in our PG upgrade functions
|
||||
--
|
||||
-- Users who took the 9.5-2--10.0-1 upgrade path already have the fix, but users
|
||||
-- who took the 9.5-1--10.0-1 upgrade path do not. Hence, we repeat the CREATE OR
|
||||
-- REPLACE from the 9.5-2 definition for citus_prepare_pg_upgrade.
|
||||
|
||||
#include "udfs/citus_prepare_pg_upgrade/9.5-2.sql"
|
||||
#include "udfs/citus_finish_pg_upgrade/10.0-4.sql"
|
|
@ -0,0 +1,3 @@
|
|||
-- 9.4-1--9.4-2 was added later as a patch to fix a bug in our PG upgrade functions
|
||||
#include "udfs/citus_prepare_pg_upgrade/9.4-2.sql"
|
||||
#include "udfs/citus_finish_pg_upgrade/9.4-2.sql"
|
|
@ -0,0 +1,9 @@
|
|||
--
|
||||
-- 9.4-1--9.4-2 was added later as a patch to fix a bug in our PG upgrade functions
|
||||
--
|
||||
-- This script brings users who installed the patch released back to the 9.4-1
|
||||
-- upgrade path. We do this via a semantical downgrade since there has already been
|
||||
-- introduced new changes in the schema from 9.4-1 to 9.5-1. To make sure we include all
|
||||
-- changes made during that version change we decide to use the existing upgrade path from
|
||||
-- our later introduced 9.4-2 version.
|
||||
--
|
|
@ -0,0 +1,7 @@
|
|||
-- 9.4-2--9.4-3 was added later as a patch to improve master_update_table_statistics
|
||||
CREATE OR REPLACE FUNCTION master_update_table_statistics(relation regclass)
|
||||
RETURNS VOID
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$citus_update_table_statistics$$;
|
||||
COMMENT ON FUNCTION pg_catalog.master_update_table_statistics(regclass)
|
||||
IS 'updates shard statistics of the given table';
|
|
@ -0,0 +1,22 @@
|
|||
-- citus--9.4-3--9.4-2
|
||||
-- This is a downgrade path that will revert the changes made in citus--9.4-2--9.4-3.sql
|
||||
-- 9.4-2--9.4-3 was added later as a patch to improve master_update_table_statistics.
|
||||
-- We have this downgrade script so that we can continue from the main upgrade path
|
||||
-- when upgrading to later versions.
|
||||
CREATE OR REPLACE FUNCTION master_update_table_statistics(relation regclass)
|
||||
RETURNS VOID AS $$
|
||||
DECLARE
|
||||
colocated_tables regclass[];
|
||||
BEGIN
|
||||
SELECT get_colocated_table_array(relation) INTO colocated_tables;
|
||||
|
||||
PERFORM
|
||||
master_update_shard_statistics(shardid)
|
||||
FROM
|
||||
pg_dist_shard
|
||||
WHERE
|
||||
logicalrelid = ANY (colocated_tables);
|
||||
END;
|
||||
$$ LANGUAGE 'plpgsql';
|
||||
COMMENT ON FUNCTION master_update_table_statistics(regclass)
|
||||
IS 'updates shard statistics of the given table and its colocated tables';
|
|
@ -1,10 +1,16 @@
|
|||
-- citus--9.5-1--10.0-1
|
||||
-- citus--9.5-1--10.0-4
|
||||
|
||||
-- This migration file aims to fix the issues with upgrades on clusters without public schema.
|
||||
|
||||
-- This file is created by the following command, and some more changes in a separate commit
|
||||
-- cat citus--9.5-1--10.0-1.sql citus--10.0-1--10.0-2.sql citus--10.0-2--10.0-3.sql > citus--9.5-1--10.0-4.sql
|
||||
|
||||
-- copy of citus--9.5-1--10.0-1
|
||||
|
||||
DROP FUNCTION pg_catalog.upgrade_to_reference_table(regclass);
|
||||
DROP FUNCTION IF EXISTS pg_catalog.citus_total_relation_size(regclass);
|
||||
|
||||
#include "udfs/citus_total_relation_size/10.0-1.sql"
|
||||
#include "udfs/citus_tables/10.0-1.sql"
|
||||
#include "udfs/citus_finish_pg_upgrade/10.0-1.sql"
|
||||
#include "udfs/alter_distributed_table/10.0-1.sql"
|
||||
#include "udfs/alter_table_set_access_method/10.0-1.sql"
|
||||
|
@ -164,4 +170,48 @@ SELECT * FROM pg_catalog.citus_worker_stat_activity();
|
|||
ALTER VIEW citus.citus_worker_stat_activity SET SCHEMA pg_catalog;
|
||||
GRANT SELECT ON pg_catalog.citus_worker_stat_activity TO PUBLIC;
|
||||
|
||||
-- copy of citus--10.0-1--10.0-2
|
||||
|
||||
#include "../../columnar/sql/columnar--10.0-1--10.0-2.sql"
|
||||
|
||||
-- copy of citus--10.0-2--10.0-3
|
||||
|
||||
#include "udfs/citus_update_table_statistics/10.0-3.sql"
|
||||
|
||||
CREATE OR REPLACE FUNCTION master_update_table_statistics(relation regclass)
|
||||
RETURNS VOID
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$citus_update_table_statistics$$;
|
||||
COMMENT ON FUNCTION pg_catalog.master_update_table_statistics(regclass)
|
||||
IS 'updates shard statistics of the given table';
|
||||
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.citus_get_active_worker_nodes(OUT node_name text, OUT node_port bigint)
|
||||
RETURNS SETOF record
|
||||
LANGUAGE C STRICT ROWS 100
|
||||
AS 'MODULE_PATHNAME', $$citus_get_active_worker_nodes$$;
|
||||
COMMENT ON FUNCTION pg_catalog.citus_get_active_worker_nodes()
|
||||
IS 'fetch set of active worker nodes';
|
||||
|
||||
-- copy of citus--10.0-3--10.0-4
|
||||
|
||||
-- This migration file aims to fix 2 issues with upgrades on clusters
|
||||
|
||||
-- 1. a bug in public schema dependency for citus_tables view.
|
||||
--
|
||||
-- Users who do not have public schema in their clusters were unable to upgrade
|
||||
-- to Citus 10.x due to the citus_tables view that used to be created in public
|
||||
-- schema
|
||||
|
||||
#include "udfs/citus_tables/10.0-4.sql"
|
||||
|
||||
-- 2. a bug in our PG upgrade functions
|
||||
--
|
||||
-- Users who took the 9.5-2--10.0-1 upgrade path already have the fix, but users
|
||||
-- who took the 9.5-1--10.0-1 upgrade path do not. Hence, we repeat the CREATE OR
|
||||
-- REPLACE from the 9.5-2 definition for citus_prepare_pg_upgrade.
|
||||
|
||||
#include "udfs/citus_prepare_pg_upgrade/9.5-2.sql"
|
||||
#include "udfs/citus_finish_pg_upgrade/10.0-4.sql"
|
||||
|
||||
|
||||
RESET search_path;
|
|
@ -0,0 +1,3 @@
|
|||
-- 9.5-1--9.5-2 was added later as a patch to fix a bug in our PG upgrade functions
|
||||
#include "udfs/citus_prepare_pg_upgrade/9.5-2.sql"
|
||||
#include "udfs/citus_finish_pg_upgrade/9.5-2.sql"
|
|
@ -0,0 +1,9 @@
|
|||
--
|
||||
-- 9.5-1--9.5-2 was added later as a patch to fix a bug in our PG upgrade functions
|
||||
--
|
||||
-- This script brings users who installed the patch released back to the 9.5-1
|
||||
-- upgrade path. We do this via a semantical downgrade since there has already been
|
||||
-- introduced new changes in the schema from 9.5-1 to 10.0-1. To make sure we include all
|
||||
-- changes made during that version change we decide to use the existing upgrade path from
|
||||
-- our later introduced 9.5-1 version.
|
||||
--
|
|
@ -0,0 +1,7 @@
|
|||
-- 9.5-2--9.5-3 was added later as a patch to improve master_update_table_statistics
|
||||
CREATE OR REPLACE FUNCTION master_update_table_statistics(relation regclass)
|
||||
RETURNS VOID
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$citus_update_table_statistics$$;
|
||||
COMMENT ON FUNCTION pg_catalog.master_update_table_statistics(regclass)
|
||||
IS 'updates shard statistics of the given table';
|
|
@ -0,0 +1,22 @@
|
|||
-- citus--9.5-3--9.5-2
|
||||
-- This is a downgrade path that will revert the changes made in citus--9.5-2--9.5-3.sql
|
||||
-- 9.5-2--9.5-3 was added later as a patch to improve master_update_table_statistics.
|
||||
-- We have this downgrade script so that we can continue from the main upgrade path
|
||||
-- when upgrading to later versions.
|
||||
CREATE OR REPLACE FUNCTION master_update_table_statistics(relation regclass)
|
||||
RETURNS VOID AS $$
|
||||
DECLARE
|
||||
colocated_tables regclass[];
|
||||
BEGIN
|
||||
SELECT get_colocated_table_array(relation) INTO colocated_tables;
|
||||
|
||||
PERFORM
|
||||
master_update_shard_statistics(shardid)
|
||||
FROM
|
||||
pg_dist_shard
|
||||
WHERE
|
||||
logicalrelid = ANY (colocated_tables);
|
||||
END;
|
||||
$$ LANGUAGE 'plpgsql';
|
||||
COMMENT ON FUNCTION master_update_table_statistics(regclass)
|
||||
IS 'updates shard statistics of the given table and its colocated tables';
|
|
@ -1,4 +1,51 @@
|
|||
-- citus--10.0-1--9.5-1
|
||||
-- citus--10.0-4--9.5-1
|
||||
|
||||
-- This migration file aims to fix the issues with upgrades on clusters without public schema.
|
||||
|
||||
-- This file is created by the following command, and some more changes in a separate commit
|
||||
-- cat citus--10.0-3--10.0-2.sql citus--10.0-2--10.0-1.sql citus--10.0-1--9.5-1.sql > citus--10.0-4--9.5-1.sql
|
||||
|
||||
-- copy of citus--10.0-4--10.0-3
|
||||
--
|
||||
-- 10.0-3--10.0-4 was added later as a patch to fix a bug in our PG upgrade functions
|
||||
--
|
||||
-- The upgrade fixes a bug in citus_(prepare|finish)_pg_upgrade. Given the old versions of
|
||||
-- these functions contain a bug it is better to _not_ restore the old version and keep
|
||||
-- the patched version of the function.
|
||||
--
|
||||
-- This is inline with the downgrade scripts for earlier versions of this patch
|
||||
--
|
||||
|
||||
-- copy of citus--10.0-3--10.0-2
|
||||
-- this is a downgrade path that will revert the changes made in citus--10.0-2--10.0-3.sql
|
||||
|
||||
DROP FUNCTION pg_catalog.citus_update_table_statistics(regclass);
|
||||
|
||||
#include "../udfs/citus_update_table_statistics/10.0-1.sql"
|
||||
|
||||
CREATE OR REPLACE FUNCTION master_update_table_statistics(relation regclass)
|
||||
RETURNS VOID AS $$
|
||||
DECLARE
|
||||
colocated_tables regclass[];
|
||||
BEGIN
|
||||
SELECT get_colocated_table_array(relation) INTO colocated_tables;
|
||||
|
||||
PERFORM
|
||||
master_update_shard_statistics(shardid)
|
||||
FROM
|
||||
pg_dist_shard
|
||||
WHERE
|
||||
logicalrelid = ANY (colocated_tables);
|
||||
END;
|
||||
$$ LANGUAGE 'plpgsql';
|
||||
COMMENT ON FUNCTION master_update_table_statistics(regclass)
|
||||
IS 'updates shard statistics of the given table and its colocated tables';
|
||||
|
||||
DROP FUNCTION pg_catalog.citus_get_active_worker_nodes(OUT text, OUT bigint);
|
||||
/* copy of citus--10.0-2--10.0-1.sql */
|
||||
#include "../../../columnar/sql/downgrades/columnar--10.0-2--10.0-1.sql"
|
||||
|
||||
-- copy of citus--10.0-1--9.5-1
|
||||
|
||||
-- In Citus 10.0, we added another internal udf (notify_constraint_dropped)
|
||||
-- to be called by citus_drop_trigger. Since this script is executed when
|
||||
|
@ -18,7 +65,8 @@ DROP FUNCTION pg_catalog.notify_constraint_dropped();
|
|||
|
||||
#include "../../../columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql"
|
||||
|
||||
DROP VIEW public.citus_tables;
|
||||
DROP VIEW IF EXISTS pg_catalog.citus_tables;
|
||||
DROP VIEW IF EXISTS public.citus_tables;
|
||||
DROP FUNCTION pg_catalog.alter_distributed_table(regclass, text, int, text, boolean);
|
||||
DROP FUNCTION pg_catalog.alter_table_set_access_method(regclass, text);
|
||||
DROP FUNCTION pg_catalog.citus_total_relation_size(regclass,boolean);
|
|
@ -0,0 +1,108 @@
|
|||
CREATE OR REPLACE FUNCTION pg_catalog.citus_finish_pg_upgrade()
|
||||
RETURNS void
|
||||
LANGUAGE plpgsql
|
||||
SET search_path = pg_catalog
|
||||
AS $cppu$
|
||||
DECLARE
|
||||
table_name regclass;
|
||||
command text;
|
||||
trigger_name text;
|
||||
BEGIN
|
||||
--
|
||||
-- restore citus catalog tables
|
||||
--
|
||||
INSERT INTO pg_catalog.pg_dist_partition SELECT * FROM public.pg_dist_partition;
|
||||
INSERT INTO pg_catalog.pg_dist_shard SELECT * FROM public.pg_dist_shard;
|
||||
INSERT INTO pg_catalog.pg_dist_placement SELECT * FROM public.pg_dist_placement;
|
||||
INSERT INTO pg_catalog.pg_dist_node_metadata SELECT * FROM public.pg_dist_node_metadata;
|
||||
INSERT INTO pg_catalog.pg_dist_node SELECT * FROM public.pg_dist_node;
|
||||
INSERT INTO pg_catalog.pg_dist_local_group SELECT * FROM public.pg_dist_local_group;
|
||||
INSERT INTO pg_catalog.pg_dist_transaction SELECT * FROM public.pg_dist_transaction;
|
||||
INSERT INTO pg_catalog.pg_dist_colocation SELECT * FROM public.pg_dist_colocation;
|
||||
-- enterprise catalog tables
|
||||
INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo;
|
||||
INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo;
|
||||
|
||||
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger;
|
||||
INSERT INTO pg_catalog.pg_dist_rebalance_strategy SELECT
|
||||
name,
|
||||
default_strategy,
|
||||
shard_cost_function::regprocedure::regproc,
|
||||
node_capacity_function::regprocedure::regproc,
|
||||
shard_allowed_on_node_function::regprocedure::regproc,
|
||||
default_threshold,
|
||||
minimum_threshold
|
||||
FROM public.pg_dist_rebalance_strategy;
|
||||
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger;
|
||||
|
||||
--
|
||||
-- drop backup tables
|
||||
--
|
||||
DROP TABLE public.pg_dist_authinfo;
|
||||
DROP TABLE public.pg_dist_colocation;
|
||||
DROP TABLE public.pg_dist_local_group;
|
||||
DROP TABLE public.pg_dist_node;
|
||||
DROP TABLE public.pg_dist_node_metadata;
|
||||
DROP TABLE public.pg_dist_partition;
|
||||
DROP TABLE public.pg_dist_placement;
|
||||
DROP TABLE public.pg_dist_poolinfo;
|
||||
DROP TABLE public.pg_dist_shard;
|
||||
DROP TABLE public.pg_dist_transaction;
|
||||
DROP TABLE public.pg_dist_rebalance_strategy;
|
||||
|
||||
--
|
||||
-- reset sequences
|
||||
--
|
||||
PERFORM setval('pg_catalog.pg_dist_shardid_seq', (SELECT MAX(shardid)+1 AS max_shard_id FROM pg_dist_shard), false);
|
||||
PERFORM setval('pg_catalog.pg_dist_placement_placementid_seq', (SELECT MAX(placementid)+1 AS max_placement_id FROM pg_dist_placement), false);
|
||||
PERFORM setval('pg_catalog.pg_dist_groupid_seq', (SELECT MAX(groupid)+1 AS max_group_id FROM pg_dist_node), false);
|
||||
PERFORM setval('pg_catalog.pg_dist_node_nodeid_seq', (SELECT MAX(nodeid)+1 AS max_node_id FROM pg_dist_node), false);
|
||||
PERFORM setval('pg_catalog.pg_dist_colocationid_seq', (SELECT MAX(colocationid)+1 AS max_colocation_id FROM pg_dist_colocation), false);
|
||||
|
||||
--
|
||||
-- register triggers
|
||||
--
|
||||
FOR table_name IN SELECT logicalrelid FROM pg_catalog.pg_dist_partition
|
||||
LOOP
|
||||
trigger_name := 'truncate_trigger_' || table_name::oid;
|
||||
command := 'create trigger ' || trigger_name || ' after truncate on ' || table_name || ' execute procedure pg_catalog.citus_truncate_trigger()';
|
||||
EXECUTE command;
|
||||
command := 'update pg_trigger set tgisinternal = true where tgname = ' || quote_literal(trigger_name);
|
||||
EXECUTE command;
|
||||
END LOOP;
|
||||
|
||||
--
|
||||
-- set dependencies
|
||||
--
|
||||
INSERT INTO pg_depend
|
||||
SELECT
|
||||
'pg_class'::regclass::oid as classid,
|
||||
p.logicalrelid::regclass::oid as objid,
|
||||
0 as objsubid,
|
||||
'pg_extension'::regclass::oid as refclassid,
|
||||
(select oid from pg_extension where extname = 'citus') as refobjid,
|
||||
0 as refobjsubid ,
|
||||
'n' as deptype
|
||||
FROM pg_catalog.pg_dist_partition p;
|
||||
|
||||
-- restore pg_dist_object from the stable identifiers
|
||||
TRUNCATE citus.pg_dist_object;
|
||||
INSERT INTO citus.pg_dist_object (classid, objid, objsubid, distribution_argument_index, colocationid)
|
||||
SELECT
|
||||
address.classid,
|
||||
address.objid,
|
||||
address.objsubid,
|
||||
naming.distribution_argument_index,
|
||||
naming.colocationid
|
||||
FROM
|
||||
public.pg_dist_object naming,
|
||||
pg_catalog.pg_get_object_address(naming.type, naming.object_names, naming.object_args) address;
|
||||
|
||||
DROP TABLE public.pg_dist_object;
|
||||
|
||||
PERFORM citus_internal.columnar_ensure_objects_exist();
|
||||
END;
|
||||
$cppu$;
|
||||
|
||||
COMMENT ON FUNCTION pg_catalog.citus_finish_pg_upgrade()
|
||||
IS 'perform tasks to restore citus settings from a location that has been prepared before pg_upgrade';
|
|
@ -0,0 +1,105 @@
|
|||
CREATE OR REPLACE FUNCTION pg_catalog.citus_finish_pg_upgrade()
|
||||
RETURNS void
|
||||
LANGUAGE plpgsql
|
||||
SET search_path = pg_catalog
|
||||
AS $cppu$
|
||||
DECLARE
|
||||
table_name regclass;
|
||||
command text;
|
||||
trigger_name text;
|
||||
BEGIN
|
||||
--
|
||||
-- restore citus catalog tables
|
||||
--
|
||||
INSERT INTO pg_catalog.pg_dist_partition SELECT * FROM public.pg_dist_partition;
|
||||
INSERT INTO pg_catalog.pg_dist_shard SELECT * FROM public.pg_dist_shard;
|
||||
INSERT INTO pg_catalog.pg_dist_placement SELECT * FROM public.pg_dist_placement;
|
||||
INSERT INTO pg_catalog.pg_dist_node_metadata SELECT * FROM public.pg_dist_node_metadata;
|
||||
INSERT INTO pg_catalog.pg_dist_node SELECT * FROM public.pg_dist_node;
|
||||
INSERT INTO pg_catalog.pg_dist_local_group SELECT * FROM public.pg_dist_local_group;
|
||||
INSERT INTO pg_catalog.pg_dist_transaction SELECT * FROM public.pg_dist_transaction;
|
||||
INSERT INTO pg_catalog.pg_dist_colocation SELECT * FROM public.pg_dist_colocation;
|
||||
-- enterprise catalog tables
|
||||
INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo;
|
||||
INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo;
|
||||
|
||||
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger;
|
||||
INSERT INTO pg_catalog.pg_dist_rebalance_strategy SELECT
|
||||
name,
|
||||
default_strategy,
|
||||
shard_cost_function::regprocedure::regproc,
|
||||
node_capacity_function::regprocedure::regproc,
|
||||
shard_allowed_on_node_function::regprocedure::regproc,
|
||||
default_threshold,
|
||||
minimum_threshold
|
||||
FROM public.pg_dist_rebalance_strategy;
|
||||
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger;
|
||||
|
||||
--
|
||||
-- drop backup tables
|
||||
--
|
||||
DROP TABLE public.pg_dist_authinfo;
|
||||
DROP TABLE public.pg_dist_colocation;
|
||||
DROP TABLE public.pg_dist_local_group;
|
||||
DROP TABLE public.pg_dist_node;
|
||||
DROP TABLE public.pg_dist_node_metadata;
|
||||
DROP TABLE public.pg_dist_partition;
|
||||
DROP TABLE public.pg_dist_placement;
|
||||
DROP TABLE public.pg_dist_poolinfo;
|
||||
DROP TABLE public.pg_dist_shard;
|
||||
DROP TABLE public.pg_dist_transaction;
|
||||
|
||||
--
|
||||
-- reset sequences
|
||||
--
|
||||
PERFORM setval('pg_catalog.pg_dist_shardid_seq', (SELECT MAX(shardid)+1 AS max_shard_id FROM pg_dist_shard), false);
|
||||
PERFORM setval('pg_catalog.pg_dist_placement_placementid_seq', (SELECT MAX(placementid)+1 AS max_placement_id FROM pg_dist_placement), false);
|
||||
PERFORM setval('pg_catalog.pg_dist_groupid_seq', (SELECT MAX(groupid)+1 AS max_group_id FROM pg_dist_node), false);
|
||||
PERFORM setval('pg_catalog.pg_dist_node_nodeid_seq', (SELECT MAX(nodeid)+1 AS max_node_id FROM pg_dist_node), false);
|
||||
PERFORM setval('pg_catalog.pg_dist_colocationid_seq', (SELECT MAX(colocationid)+1 AS max_colocation_id FROM pg_dist_colocation), false);
|
||||
|
||||
--
|
||||
-- register triggers
|
||||
--
|
||||
FOR table_name IN SELECT logicalrelid FROM pg_catalog.pg_dist_partition
|
||||
LOOP
|
||||
trigger_name := 'truncate_trigger_' || table_name::oid;
|
||||
command := 'create trigger ' || trigger_name || ' after truncate on ' || table_name || ' execute procedure pg_catalog.citus_truncate_trigger()';
|
||||
EXECUTE command;
|
||||
command := 'update pg_trigger set tgisinternal = true where tgname = ' || quote_literal(trigger_name);
|
||||
EXECUTE command;
|
||||
END LOOP;
|
||||
|
||||
--
|
||||
-- set dependencies
|
||||
--
|
||||
INSERT INTO pg_depend
|
||||
SELECT
|
||||
'pg_class'::regclass::oid as classid,
|
||||
p.logicalrelid::regclass::oid as objid,
|
||||
0 as objsubid,
|
||||
'pg_extension'::regclass::oid as refclassid,
|
||||
(select oid from pg_extension where extname = 'citus') as refobjid,
|
||||
0 as refobjsubid ,
|
||||
'n' as deptype
|
||||
FROM pg_catalog.pg_dist_partition p;
|
||||
|
||||
-- restore pg_dist_object from the stable identifiers
|
||||
TRUNCATE citus.pg_dist_object;
|
||||
INSERT INTO citus.pg_dist_object (classid, objid, objsubid, distribution_argument_index, colocationid)
|
||||
SELECT
|
||||
address.classid,
|
||||
address.objid,
|
||||
address.objsubid,
|
||||
naming.distribution_argument_index,
|
||||
naming.colocationid
|
||||
FROM
|
||||
public.pg_dist_object naming,
|
||||
pg_catalog.pg_get_object_address(naming.type, naming.object_names, naming.object_args) address;
|
||||
|
||||
DROP TABLE public.pg_dist_object;
|
||||
END;
|
||||
$cppu$;
|
||||
|
||||
COMMENT ON FUNCTION pg_catalog.citus_finish_pg_upgrade()
|
||||
IS 'perform tasks to restore citus settings from a location that has been prepared before pg_upgrade';
|
|
@ -0,0 +1,106 @@
|
|||
CREATE OR REPLACE FUNCTION pg_catalog.citus_finish_pg_upgrade()
|
||||
RETURNS void
|
||||
LANGUAGE plpgsql
|
||||
SET search_path = pg_catalog
|
||||
AS $cppu$
|
||||
DECLARE
|
||||
table_name regclass;
|
||||
command text;
|
||||
trigger_name text;
|
||||
BEGIN
|
||||
--
|
||||
-- restore citus catalog tables
|
||||
--
|
||||
INSERT INTO pg_catalog.pg_dist_partition SELECT * FROM public.pg_dist_partition;
|
||||
INSERT INTO pg_catalog.pg_dist_shard SELECT * FROM public.pg_dist_shard;
|
||||
INSERT INTO pg_catalog.pg_dist_placement SELECT * FROM public.pg_dist_placement;
|
||||
INSERT INTO pg_catalog.pg_dist_node_metadata SELECT * FROM public.pg_dist_node_metadata;
|
||||
INSERT INTO pg_catalog.pg_dist_node SELECT * FROM public.pg_dist_node;
|
||||
INSERT INTO pg_catalog.pg_dist_local_group SELECT * FROM public.pg_dist_local_group;
|
||||
INSERT INTO pg_catalog.pg_dist_transaction SELECT * FROM public.pg_dist_transaction;
|
||||
INSERT INTO pg_catalog.pg_dist_colocation SELECT * FROM public.pg_dist_colocation;
|
||||
-- enterprise catalog tables
|
||||
INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo;
|
||||
INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo;
|
||||
|
||||
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger;
|
||||
INSERT INTO pg_catalog.pg_dist_rebalance_strategy SELECT
|
||||
name,
|
||||
default_strategy,
|
||||
shard_cost_function::regprocedure::regproc,
|
||||
node_capacity_function::regprocedure::regproc,
|
||||
shard_allowed_on_node_function::regprocedure::regproc,
|
||||
default_threshold,
|
||||
minimum_threshold
|
||||
FROM public.pg_dist_rebalance_strategy;
|
||||
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger;
|
||||
|
||||
--
|
||||
-- drop backup tables
|
||||
--
|
||||
DROP TABLE public.pg_dist_authinfo;
|
||||
DROP TABLE public.pg_dist_colocation;
|
||||
DROP TABLE public.pg_dist_local_group;
|
||||
DROP TABLE public.pg_dist_node;
|
||||
DROP TABLE public.pg_dist_node_metadata;
|
||||
DROP TABLE public.pg_dist_partition;
|
||||
DROP TABLE public.pg_dist_placement;
|
||||
DROP TABLE public.pg_dist_poolinfo;
|
||||
DROP TABLE public.pg_dist_shard;
|
||||
DROP TABLE public.pg_dist_transaction;
|
||||
DROP TABLE public.pg_dist_rebalance_strategy;
|
||||
|
||||
--
|
||||
-- reset sequences
|
||||
--
|
||||
PERFORM setval('pg_catalog.pg_dist_shardid_seq', (SELECT MAX(shardid)+1 AS max_shard_id FROM pg_dist_shard), false);
|
||||
PERFORM setval('pg_catalog.pg_dist_placement_placementid_seq', (SELECT MAX(placementid)+1 AS max_placement_id FROM pg_dist_placement), false);
|
||||
PERFORM setval('pg_catalog.pg_dist_groupid_seq', (SELECT MAX(groupid)+1 AS max_group_id FROM pg_dist_node), false);
|
||||
PERFORM setval('pg_catalog.pg_dist_node_nodeid_seq', (SELECT MAX(nodeid)+1 AS max_node_id FROM pg_dist_node), false);
|
||||
PERFORM setval('pg_catalog.pg_dist_colocationid_seq', (SELECT MAX(colocationid)+1 AS max_colocation_id FROM pg_dist_colocation), false);
|
||||
|
||||
--
|
||||
-- register triggers
|
||||
--
|
||||
FOR table_name IN SELECT logicalrelid FROM pg_catalog.pg_dist_partition
|
||||
LOOP
|
||||
trigger_name := 'truncate_trigger_' || table_name::oid;
|
||||
command := 'create trigger ' || trigger_name || ' after truncate on ' || table_name || ' execute procedure pg_catalog.citus_truncate_trigger()';
|
||||
EXECUTE command;
|
||||
command := 'update pg_trigger set tgisinternal = true where tgname = ' || quote_literal(trigger_name);
|
||||
EXECUTE command;
|
||||
END LOOP;
|
||||
|
||||
--
|
||||
-- set dependencies
|
||||
--
|
||||
INSERT INTO pg_depend
|
||||
SELECT
|
||||
'pg_class'::regclass::oid as classid,
|
||||
p.logicalrelid::regclass::oid as objid,
|
||||
0 as objsubid,
|
||||
'pg_extension'::regclass::oid as refclassid,
|
||||
(select oid from pg_extension where extname = 'citus') as refobjid,
|
||||
0 as refobjsubid ,
|
||||
'n' as deptype
|
||||
FROM pg_catalog.pg_dist_partition p;
|
||||
|
||||
-- restore pg_dist_object from the stable identifiers
|
||||
TRUNCATE citus.pg_dist_object;
|
||||
INSERT INTO citus.pg_dist_object (classid, objid, objsubid, distribution_argument_index, colocationid)
|
||||
SELECT
|
||||
address.classid,
|
||||
address.objid,
|
||||
address.objsubid,
|
||||
naming.distribution_argument_index,
|
||||
naming.colocationid
|
||||
FROM
|
||||
public.pg_dist_object naming,
|
||||
pg_catalog.pg_get_object_address(naming.type, naming.object_names, naming.object_args) address;
|
||||
|
||||
DROP TABLE public.pg_dist_object;
|
||||
END;
|
||||
$cppu$;
|
||||
|
||||
COMMENT ON FUNCTION pg_catalog.citus_finish_pg_upgrade()
|
||||
IS 'perform tasks to restore citus settings from a location that has been prepared before pg_upgrade';
|
|
@ -86,17 +86,7 @@ BEGIN
|
|||
FROM pg_catalog.pg_dist_partition p;
|
||||
|
||||
-- restore pg_dist_object from the stable identifiers
|
||||
-- DELETE/INSERT to avoid primary key violations
|
||||
WITH old_records AS (
|
||||
DELETE FROM
|
||||
citus.pg_dist_object
|
||||
RETURNING
|
||||
type,
|
||||
object_names,
|
||||
object_args,
|
||||
distribution_argument_index,
|
||||
colocationid
|
||||
)
|
||||
TRUNCATE citus.pg_dist_object;
|
||||
INSERT INTO citus.pg_dist_object (classid, objid, objsubid, distribution_argument_index, colocationid)
|
||||
SELECT
|
||||
address.classid,
|
||||
|
@ -105,8 +95,10 @@ BEGIN
|
|||
naming.distribution_argument_index,
|
||||
naming.colocationid
|
||||
FROM
|
||||
old_records naming,
|
||||
pg_get_object_address(naming.type, naming.object_names, naming.object_args) address;
|
||||
public.pg_dist_object naming,
|
||||
pg_catalog.pg_get_object_address(naming.type, naming.object_names, naming.object_args) address;
|
||||
|
||||
DROP TABLE public.pg_dist_object;
|
||||
|
||||
PERFORM citus_internal.columnar_ensure_objects_exist();
|
||||
END;
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
CREATE OR REPLACE FUNCTION pg_catalog.citus_prepare_pg_upgrade()
|
||||
RETURNS void
|
||||
LANGUAGE plpgsql
|
||||
SET search_path = pg_catalog
|
||||
AS $cppu$
|
||||
BEGIN
|
||||
--
|
||||
-- backup citus catalog tables
|
||||
--
|
||||
CREATE TABLE public.pg_dist_partition AS SELECT * FROM pg_catalog.pg_dist_partition;
|
||||
CREATE TABLE public.pg_dist_shard AS SELECT * FROM pg_catalog.pg_dist_shard;
|
||||
CREATE TABLE public.pg_dist_placement AS SELECT * FROM pg_catalog.pg_dist_placement;
|
||||
CREATE TABLE public.pg_dist_node_metadata AS SELECT * FROM pg_catalog.pg_dist_node_metadata;
|
||||
CREATE TABLE public.pg_dist_node AS SELECT * FROM pg_catalog.pg_dist_node;
|
||||
CREATE TABLE public.pg_dist_local_group AS SELECT * FROM pg_catalog.pg_dist_local_group;
|
||||
CREATE TABLE public.pg_dist_transaction AS SELECT * FROM pg_catalog.pg_dist_transaction;
|
||||
CREATE TABLE public.pg_dist_colocation AS SELECT * FROM pg_catalog.pg_dist_colocation;
|
||||
-- enterprise catalog tables
|
||||
CREATE TABLE public.pg_dist_authinfo AS SELECT * FROM pg_catalog.pg_dist_authinfo;
|
||||
CREATE TABLE public.pg_dist_poolinfo AS SELECT * FROM pg_catalog.pg_dist_poolinfo;
|
||||
CREATE TABLE public.pg_dist_rebalance_strategy AS SELECT
|
||||
name,
|
||||
default_strategy,
|
||||
shard_cost_function::regprocedure::text,
|
||||
node_capacity_function::regprocedure::text,
|
||||
shard_allowed_on_node_function::regprocedure::text,
|
||||
default_threshold,
|
||||
minimum_threshold
|
||||
FROM pg_catalog.pg_dist_rebalance_strategy;
|
||||
|
||||
-- store upgrade stable identifiers on pg_dist_object catalog
|
||||
CREATE TABLE public.pg_dist_object AS SELECT
|
||||
address.type,
|
||||
address.object_names,
|
||||
address.object_args,
|
||||
objects.distribution_argument_index,
|
||||
objects.colocationid
|
||||
FROM citus.pg_dist_object objects,
|
||||
pg_catalog.pg_identify_object_as_address(objects.classid, objects.objid, objects.objsubid) address;
|
||||
END;
|
||||
$cppu$;
|
||||
|
||||
COMMENT ON FUNCTION pg_catalog.citus_prepare_pg_upgrade()
|
||||
IS 'perform tasks to copy citus settings to a location that could later be restored after pg_upgrade is done';
|
|
@ -0,0 +1,60 @@
|
|||
CREATE OR REPLACE FUNCTION pg_catalog.citus_prepare_pg_upgrade()
|
||||
RETURNS void
|
||||
LANGUAGE plpgsql
|
||||
SET search_path = pg_catalog
|
||||
AS $cppu$
|
||||
BEGIN
|
||||
--
|
||||
-- Drop existing backup tables
|
||||
--
|
||||
DROP TABLE IF EXISTS public.pg_dist_partition;
|
||||
DROP TABLE IF EXISTS public.pg_dist_shard;
|
||||
DROP TABLE IF EXISTS public.pg_dist_placement;
|
||||
DROP TABLE IF EXISTS public.pg_dist_node_metadata;
|
||||
DROP TABLE IF EXISTS public.pg_dist_node;
|
||||
DROP TABLE IF EXISTS public.pg_dist_local_group;
|
||||
DROP TABLE IF EXISTS public.pg_dist_transaction;
|
||||
DROP TABLE IF EXISTS public.pg_dist_colocation;
|
||||
DROP TABLE IF EXISTS public.pg_dist_authinfo;
|
||||
DROP TABLE IF EXISTS public.pg_dist_poolinfo;
|
||||
DROP TABLE IF EXISTS public.pg_dist_rebalance_strategy;
|
||||
DROP TABLE IF EXISTS public.pg_dist_object;
|
||||
|
||||
--
|
||||
-- backup citus catalog tables
|
||||
--
|
||||
CREATE TABLE public.pg_dist_partition AS SELECT * FROM pg_catalog.pg_dist_partition;
|
||||
CREATE TABLE public.pg_dist_shard AS SELECT * FROM pg_catalog.pg_dist_shard;
|
||||
CREATE TABLE public.pg_dist_placement AS SELECT * FROM pg_catalog.pg_dist_placement;
|
||||
CREATE TABLE public.pg_dist_node_metadata AS SELECT * FROM pg_catalog.pg_dist_node_metadata;
|
||||
CREATE TABLE public.pg_dist_node AS SELECT * FROM pg_catalog.pg_dist_node;
|
||||
CREATE TABLE public.pg_dist_local_group AS SELECT * FROM pg_catalog.pg_dist_local_group;
|
||||
CREATE TABLE public.pg_dist_transaction AS SELECT * FROM pg_catalog.pg_dist_transaction;
|
||||
CREATE TABLE public.pg_dist_colocation AS SELECT * FROM pg_catalog.pg_dist_colocation;
|
||||
-- enterprise catalog tables
|
||||
CREATE TABLE public.pg_dist_authinfo AS SELECT * FROM pg_catalog.pg_dist_authinfo;
|
||||
CREATE TABLE public.pg_dist_poolinfo AS SELECT * FROM pg_catalog.pg_dist_poolinfo;
|
||||
CREATE TABLE public.pg_dist_rebalance_strategy AS SELECT
|
||||
name,
|
||||
default_strategy,
|
||||
shard_cost_function::regprocedure::text,
|
||||
node_capacity_function::regprocedure::text,
|
||||
shard_allowed_on_node_function::regprocedure::text,
|
||||
default_threshold,
|
||||
minimum_threshold
|
||||
FROM pg_catalog.pg_dist_rebalance_strategy;
|
||||
|
||||
-- store upgrade stable identifiers on pg_dist_object catalog
|
||||
CREATE TABLE public.pg_dist_object AS SELECT
|
||||
address.type,
|
||||
address.object_names,
|
||||
address.object_args,
|
||||
objects.distribution_argument_index,
|
||||
objects.colocationid
|
||||
FROM citus.pg_dist_object objects,
|
||||
pg_catalog.pg_identify_object_as_address(objects.classid, objects.objid, objects.objsubid) address;
|
||||
END;
|
||||
$cppu$;
|
||||
|
||||
COMMENT ON FUNCTION pg_catalog.citus_prepare_pg_upgrade()
|
||||
IS 'perform tasks to copy citus settings to a location that could later be restored after pg_upgrade is done';
|
|
@ -18,6 +18,7 @@ BEGIN
|
|||
DROP TABLE IF EXISTS public.pg_dist_authinfo;
|
||||
DROP TABLE IF EXISTS public.pg_dist_poolinfo;
|
||||
DROP TABLE IF EXISTS public.pg_dist_rebalance_strategy;
|
||||
DROP TABLE IF EXISTS public.pg_dist_object;
|
||||
|
||||
--
|
||||
-- backup citus catalog tables
|
||||
|
@ -44,8 +45,14 @@ BEGIN
|
|||
FROM pg_catalog.pg_dist_rebalance_strategy;
|
||||
|
||||
-- store upgrade stable identifiers on pg_dist_object catalog
|
||||
UPDATE citus.pg_dist_object
|
||||
SET (type, object_names, object_args) = (SELECT * FROM pg_identify_object_as_address(classid, objid, objsubid));
|
||||
CREATE TABLE public.pg_dist_object AS SELECT
|
||||
address.type,
|
||||
address.object_names,
|
||||
address.object_args,
|
||||
objects.distribution_argument_index,
|
||||
objects.colocationid
|
||||
FROM citus.pg_dist_object objects,
|
||||
pg_catalog.pg_identify_object_as_address(objects.classid, objects.objid, objects.objsubid) address;
|
||||
END;
|
||||
$cppu$;
|
||||
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
DO $$
|
||||
declare
|
||||
citus_tables_create_query text;
|
||||
BEGIN
|
||||
citus_tables_create_query=$CTCQ$
|
||||
CREATE OR REPLACE VIEW %I.citus_tables AS
|
||||
SELECT
|
||||
logicalrelid AS table_name,
|
||||
CASE WHEN partkey IS NOT NULL THEN 'distributed' ELSE 'reference' END AS citus_table_type,
|
||||
coalesce(column_to_column_name(logicalrelid, partkey), '<none>') AS distribution_column,
|
||||
colocationid AS colocation_id,
|
||||
pg_size_pretty(citus_total_relation_size(logicalrelid, fail_on_error := false)) AS table_size,
|
||||
(select count(*) from pg_dist_shard where logicalrelid = p.logicalrelid) AS shard_count,
|
||||
pg_get_userbyid(relowner) AS table_owner,
|
||||
amname AS access_method
|
||||
FROM
|
||||
pg_dist_partition p
|
||||
JOIN
|
||||
pg_class c ON (p.logicalrelid = c.oid)
|
||||
LEFT JOIN
|
||||
pg_am a ON (a.oid = c.relam)
|
||||
WHERE
|
||||
partkey IS NOT NULL OR repmodel = 't'
|
||||
ORDER BY
|
||||
logicalrelid::text;
|
||||
$CTCQ$;
|
||||
|
||||
IF EXISTS (SELECT 1 FROM pg_namespace WHERE nspname = 'public') THEN
|
||||
EXECUTE format(citus_tables_create_query, 'public');
|
||||
GRANT SELECT ON public.citus_tables TO public;
|
||||
ELSE
|
||||
EXECUTE format(citus_tables_create_query, 'citus');
|
||||
ALTER VIEW citus.citus_tables SET SCHEMA pg_catalog;
|
||||
GRANT SELECT ON pg_catalog.citus_tables TO public;
|
||||
END IF;
|
||||
|
||||
END;
|
||||
$$;
|
|
@ -1,20 +1,38 @@
|
|||
CREATE VIEW public.citus_tables AS
|
||||
SELECT
|
||||
logicalrelid AS table_name,
|
||||
CASE WHEN partkey IS NOT NULL THEN 'distributed' ELSE 'reference' END AS citus_table_type,
|
||||
coalesce(column_to_column_name(logicalrelid, partkey), '<none>') AS distribution_column,
|
||||
colocationid AS colocation_id,
|
||||
pg_size_pretty(citus_total_relation_size(logicalrelid, fail_on_error := false)) AS table_size,
|
||||
(select count(*) from pg_dist_shard where logicalrelid = p.logicalrelid) AS shard_count,
|
||||
pg_get_userbyid(relowner) AS table_owner,
|
||||
amname AS access_method
|
||||
FROM
|
||||
pg_dist_partition p
|
||||
JOIN
|
||||
pg_class c ON (p.logicalrelid = c.oid)
|
||||
LEFT JOIN
|
||||
pg_am a ON (a.oid = c.relam)
|
||||
WHERE
|
||||
partkey IS NOT NULL OR repmodel = 't'
|
||||
ORDER BY
|
||||
logicalrelid::text;
|
||||
DO $$
|
||||
declare
|
||||
citus_tables_create_query text;
|
||||
BEGIN
|
||||
citus_tables_create_query=$CTCQ$
|
||||
CREATE OR REPLACE VIEW %I.citus_tables AS
|
||||
SELECT
|
||||
logicalrelid AS table_name,
|
||||
CASE WHEN partkey IS NOT NULL THEN 'distributed' ELSE 'reference' END AS citus_table_type,
|
||||
coalesce(column_to_column_name(logicalrelid, partkey), '<none>') AS distribution_column,
|
||||
colocationid AS colocation_id,
|
||||
pg_size_pretty(citus_total_relation_size(logicalrelid, fail_on_error := false)) AS table_size,
|
||||
(select count(*) from pg_dist_shard where logicalrelid = p.logicalrelid) AS shard_count,
|
||||
pg_get_userbyid(relowner) AS table_owner,
|
||||
amname AS access_method
|
||||
FROM
|
||||
pg_dist_partition p
|
||||
JOIN
|
||||
pg_class c ON (p.logicalrelid = c.oid)
|
||||
LEFT JOIN
|
||||
pg_am a ON (a.oid = c.relam)
|
||||
WHERE
|
||||
partkey IS NOT NULL OR repmodel = 't'
|
||||
ORDER BY
|
||||
logicalrelid::text;
|
||||
$CTCQ$;
|
||||
|
||||
IF EXISTS (SELECT 1 FROM pg_namespace WHERE nspname = 'public') THEN
|
||||
EXECUTE format(citus_tables_create_query, 'public');
|
||||
GRANT SELECT ON public.citus_tables TO public;
|
||||
ELSE
|
||||
EXECUTE format(citus_tables_create_query, 'citus');
|
||||
ALTER VIEW citus.citus_tables SET SCHEMA pg_catalog;
|
||||
GRANT SELECT ON pg_catalog.citus_tables TO public;
|
||||
END IF;
|
||||
|
||||
END;
|
||||
$$;
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
CREATE OR REPLACE FUNCTION pg_catalog.citus_update_table_statistics(relation regclass)
|
||||
RETURNS VOID
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$citus_update_table_statistics$$;
|
||||
COMMENT ON FUNCTION pg_catalog.citus_update_table_statistics(regclass)
|
||||
IS 'updates shard statistics of the given table';
|
|
@ -1,17 +1,6 @@
|
|||
CREATE FUNCTION pg_catalog.citus_update_table_statistics(relation regclass)
|
||||
RETURNS VOID AS $$
|
||||
DECLARE
|
||||
colocated_tables regclass[];
|
||||
BEGIN
|
||||
SELECT get_colocated_table_array(relation) INTO colocated_tables;
|
||||
|
||||
PERFORM
|
||||
master_update_shard_statistics(shardid)
|
||||
FROM
|
||||
pg_dist_shard
|
||||
WHERE
|
||||
logicalrelid = ANY (colocated_tables);
|
||||
END;
|
||||
$$ LANGUAGE 'plpgsql';
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.citus_update_table_statistics(relation regclass)
|
||||
RETURNS VOID
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$citus_update_table_statistics$$;
|
||||
COMMENT ON FUNCTION pg_catalog.citus_update_table_statistics(regclass)
|
||||
IS 'updates shard statistics of the given table and its colocated tables';
|
||||
IS 'updates shard statistics of the given table';
|
||||
|
|
|
@ -5,12 +5,13 @@ FROM (
|
|||
FROM pg_class c
|
||||
JOIN pg_inherits i ON (c.oid = inhrelid)
|
||||
JOIN pg_partitioned_table p ON (inhparent = partrelid)
|
||||
JOIN pg_attribute a ON (partrelid = attrelid AND ARRAY[attnum] <@ string_to_array(partattrs::text, ' ')::int2[])
|
||||
JOIN pg_attribute a ON (partrelid = attrelid)
|
||||
JOIN pg_type t ON (atttypid = t.oid)
|
||||
JOIN pg_namespace tn ON (t.typnamespace = tn.oid)
|
||||
LEFT JOIN pg_am am ON (c.relam = am.oid),
|
||||
pg_catalog.time_partition_range(c.oid)
|
||||
WHERE c.relpartbound IS NOT NULL AND p.partstrat = 'r' AND p.partnatts = 1
|
||||
AND a.attnum = ANY(partattrs::int2[])
|
||||
) partitions
|
||||
ORDER BY partrelid::text, lower_bound;
|
||||
|
||||
|
|
|
@ -5,12 +5,13 @@ FROM (
|
|||
FROM pg_class c
|
||||
JOIN pg_inherits i ON (c.oid = inhrelid)
|
||||
JOIN pg_partitioned_table p ON (inhparent = partrelid)
|
||||
JOIN pg_attribute a ON (partrelid = attrelid AND ARRAY[attnum] <@ string_to_array(partattrs::text, ' ')::int2[])
|
||||
JOIN pg_attribute a ON (partrelid = attrelid)
|
||||
JOIN pg_type t ON (atttypid = t.oid)
|
||||
JOIN pg_namespace tn ON (t.typnamespace = tn.oid)
|
||||
LEFT JOIN pg_am am ON (c.relam = am.oid),
|
||||
pg_catalog.time_partition_range(c.oid)
|
||||
WHERE c.relpartbound IS NOT NULL AND p.partstrat = 'r' AND p.partnatts = 1
|
||||
AND a.attnum = ANY(partattrs::int2[])
|
||||
) partitions
|
||||
ORDER BY partrelid::text, lower_bound;
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
|
||||
#include "catalog/pg_type.h"
|
||||
#include "distributed/connection_management.h"
|
||||
#include "distributed/intermediate_result_pruning.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/maintenanced.h"
|
||||
#include "distributed/metadata_sync.h"
|
||||
|
@ -104,7 +105,7 @@ wait_until_metadata_sync(PG_FUNCTION_ARGS)
|
|||
}
|
||||
|
||||
MultiConnection *connection = GetNodeConnection(FORCE_NEW_CONNECTION,
|
||||
"localhost", PostPortNumber);
|
||||
LOCAL_HOST_NAME, PostPortNumber);
|
||||
ExecuteCriticalRemoteCommand(connection, "LISTEN " METADATA_SYNC_CHANNEL);
|
||||
|
||||
int waitFlags = WL_SOCKET_READABLE | WL_TIMEOUT | WL_POSTMASTER_DEATH;
|
||||
|
|
|
@ -36,12 +36,13 @@ create_progress(PG_FUNCTION_ARGS)
|
|||
{
|
||||
uint64 magicNumber = PG_GETARG_INT64(0);
|
||||
int stepCount = PG_GETARG_INT32(1);
|
||||
ProgressMonitorData *monitor = CreateProgressMonitor(magicNumber, stepCount,
|
||||
sizeof(uint64), 0);
|
||||
dsm_handle dsmHandle;
|
||||
ProgressMonitorData *monitor = CreateProgressMonitor(stepCount,
|
||||
sizeof(uint64), &dsmHandle);
|
||||
|
||||
if (monitor != NULL)
|
||||
{
|
||||
uint64 *steps = (uint64 *) monitor->steps;
|
||||
uint64 *steps = (uint64 *) ProgressMonitorSteps(monitor);
|
||||
|
||||
int i = 0;
|
||||
for (; i < stepCount; i++)
|
||||
|
@ -50,6 +51,7 @@ create_progress(PG_FUNCTION_ARGS)
|
|||
}
|
||||
}
|
||||
|
||||
RegisterProgressMonitor(magicNumber, 0, dsmHandle);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
|
@ -64,7 +66,7 @@ update_progress(PG_FUNCTION_ARGS)
|
|||
|
||||
if (monitor != NULL && step < monitor->stepCount)
|
||||
{
|
||||
uint64 *steps = (uint64 *) monitor->steps;
|
||||
uint64 *steps = (uint64 *) ProgressMonitorSteps(monitor);
|
||||
steps[step] = newValue;
|
||||
}
|
||||
|
||||
|
@ -93,7 +95,7 @@ show_progress(PG_FUNCTION_ARGS)
|
|||
ProgressMonitorData *monitor = NULL;
|
||||
foreach_ptr(monitor, monitorList)
|
||||
{
|
||||
uint64 *steps = monitor->steps;
|
||||
uint64 *steps = ProgressMonitorSteps(monitor);
|
||||
|
||||
for (int stepIndex = 0; stepIndex < monitor->stepCount; stepIndex++)
|
||||
{
|
||||
|
|
|
@ -17,10 +17,10 @@
|
|||
|
||||
#include "access/xact.h"
|
||||
#include "distributed/connection_management.h"
|
||||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/function_utils.h"
|
||||
#include "distributed/intermediate_result_pruning.h"
|
||||
#include "distributed/lock_graph.h"
|
||||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/remote_commands.h"
|
||||
#include "distributed/run_from_same_connection.h"
|
||||
|
|
|
@ -18,9 +18,14 @@
|
|||
#include "miscadmin.h"
|
||||
#include "pgstat.h"
|
||||
|
||||
#include "distributed/transaction_management.h"
|
||||
|
||||
|
||||
static Size MemoryContextTotalSpace(MemoryContext context);
|
||||
|
||||
PG_FUNCTION_INFO_V1(top_transaction_context_size);
|
||||
PG_FUNCTION_INFO_V1(coordinated_transaction_should_use_2PC);
|
||||
|
||||
|
||||
/*
|
||||
* top_transaction_context_size returns current size of TopTransactionContext.
|
||||
|
@ -54,3 +59,20 @@ MemoryContextTotalSpace(MemoryContext context)
|
|||
|
||||
return totalSpace;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* coordinated_transaction_should_use_2PC returns true if the transaction is in a
|
||||
* coordinated transaction and uses 2PC. If the transaction is nott in a
|
||||
* coordinated transaction, the function throws an error.
|
||||
*/
|
||||
Datum
|
||||
coordinated_transaction_should_use_2PC(PG_FUNCTION_ARGS)
|
||||
{
|
||||
if (!InCoordinatedTransaction())
|
||||
{
|
||||
ereport(ERROR, (errmsg("The transaction is not a coordinated transaction")));
|
||||
}
|
||||
|
||||
PG_RETURN_BOOL(GetCoordinatedTransactionShouldUse2PC());
|
||||
}
|
||||
|
|
|
@ -793,7 +793,8 @@ CheckConflictingRelationAccesses(Oid relationId, ShardPlacementAccessType access
|
|||
"foreign keys. Any parallel modification to "
|
||||
"those hash distributed tables in the same "
|
||||
"transaction can only be executed in sequential query "
|
||||
"execution mode", relationName)));
|
||||
"execution mode",
|
||||
relationName != NULL ? relationName : "<dropped>")));
|
||||
|
||||
/*
|
||||
* Switching to sequential mode is admittedly confusing and, could be useless
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "distributed/connection_management.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/placement_connection.h"
|
||||
#include "distributed/remote_commands.h"
|
||||
#include "distributed/remote_transaction.h"
|
||||
#include "distributed/transaction_identifier.h"
|
||||
|
@ -782,8 +783,16 @@ CoordinatedRemoteTransactionsPrepare(void)
|
|||
continue;
|
||||
}
|
||||
|
||||
StartRemoteTransactionPrepare(connection);
|
||||
connectionList = lappend(connectionList, connection);
|
||||
/*
|
||||
* Check if any DML or DDL is executed over the connection on any
|
||||
* placement/table. If yes, we start preparing the transaction, otherwise
|
||||
* we skip prepare since the connection didn't perform any write (read-only)
|
||||
*/
|
||||
if (ConnectionModifiedPlacement(connection))
|
||||
{
|
||||
StartRemoteTransactionPrepare(connection);
|
||||
connectionList = lappend(connectionList, connection);
|
||||
}
|
||||
}
|
||||
|
||||
bool raiseInterrupts = true;
|
||||
|
@ -798,6 +807,10 @@ CoordinatedRemoteTransactionsPrepare(void)
|
|||
|
||||
if (transaction->transactionState != REMOTE_TRANS_PREPARING)
|
||||
{
|
||||
/*
|
||||
* Verify that the connection didn't modify any placement
|
||||
*/
|
||||
Assert(!ConnectionModifiedPlacement(connection));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -96,9 +96,16 @@ MemoryContext CommitContext = NULL;
|
|||
/*
|
||||
* Should this coordinated transaction use 2PC? Set by
|
||||
* CoordinatedTransactionUse2PC(), e.g. if DDL was issued and
|
||||
* MultiShardCommitProtocol was set to 2PC.
|
||||
* MultiShardCommitProtocol was set to 2PC. But, even if this
|
||||
* flag is set, the transaction manager is smart enough to only
|
||||
* do 2PC on the remote connections that did a modification.
|
||||
*
|
||||
* As a variable name ShouldCoordinatedTransactionUse2PC could
|
||||
* be improved. We use CoordinatedTransactionShouldUse2PC() as the
|
||||
* public API function, hence couldn't come up with a better name
|
||||
* for the underlying variable at the moment.
|
||||
*/
|
||||
bool CoordinatedTransactionUses2PC = false;
|
||||
bool ShouldCoordinatedTransactionUse2PC = false;
|
||||
|
||||
/* if disabled, distributed statements in a function may run as separate transactions */
|
||||
bool FunctionOpensTransactionBlock = true;
|
||||
|
@ -183,15 +190,29 @@ InCoordinatedTransaction(void)
|
|||
|
||||
|
||||
/*
|
||||
* CoordinatedTransactionUse2PC() signals that the current coordinated
|
||||
* CoordinatedTransactionShouldUse2PC() signals that the current coordinated
|
||||
* transaction should use 2PC to commit.
|
||||
*
|
||||
* Note that even if 2PC is enabled, it is only used for connections that make
|
||||
* modification (DML or DDL).
|
||||
*/
|
||||
void
|
||||
CoordinatedTransactionUse2PC(void)
|
||||
CoordinatedTransactionShouldUse2PC(void)
|
||||
{
|
||||
Assert(InCoordinatedTransaction());
|
||||
|
||||
CoordinatedTransactionUses2PC = true;
|
||||
ShouldCoordinatedTransactionUse2PC = true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetCoordinatedTransactionShouldUse2PC is a wrapper function to read the value
|
||||
* of CoordinatedTransactionShouldUse2PCFlag.
|
||||
*/
|
||||
bool
|
||||
GetCoordinatedTransactionShouldUse2PC(void)
|
||||
{
|
||||
return ShouldCoordinatedTransactionUse2PC;
|
||||
}
|
||||
|
||||
|
||||
|
@ -297,28 +318,8 @@ CoordinatedTransactionCallback(XactEvent event, void *arg)
|
|||
/* stop propagating notices from workers, we know the query is failed */
|
||||
DisableWorkerMessagePropagation();
|
||||
|
||||
/*
|
||||
* FIXME: Add warning for the COORD_TRANS_COMMITTED case. That
|
||||
* can be reached if this backend fails after the
|
||||
* XACT_EVENT_PRE_COMMIT state.
|
||||
*/
|
||||
RemoveIntermediateResultsDirectory();
|
||||
|
||||
/*
|
||||
* Call other parts of citus that need to integrate into
|
||||
* transaction management. Do so before doing other work, so the
|
||||
* callbacks still can perform work if needed.
|
||||
*/
|
||||
{
|
||||
/*
|
||||
* On Windows it's not possible to delete a file before you've closed all
|
||||
* handles to it (rmdir will return success but not take effect). Since
|
||||
* we're in an ABORT handler it's very likely that not all handles have
|
||||
* been closed; force them closed here before running
|
||||
* RemoveIntermediateResultsDirectory.
|
||||
*/
|
||||
AtEOXact_Files(false);
|
||||
RemoveIntermediateResultsDirectory();
|
||||
}
|
||||
ResetShardPlacementTransactionState();
|
||||
|
||||
/* handles both already prepared and open transactions */
|
||||
|
@ -425,7 +426,7 @@ CoordinatedTransactionCallback(XactEvent event, void *arg)
|
|||
*/
|
||||
MarkFailedShardPlacements();
|
||||
|
||||
if (CoordinatedTransactionUses2PC)
|
||||
if (ShouldCoordinatedTransactionUse2PC)
|
||||
{
|
||||
CoordinatedRemoteTransactionsPrepare();
|
||||
CurrentCoordinatedTransactionState = COORD_TRANS_PREPARED;
|
||||
|
@ -453,7 +454,7 @@ CoordinatedTransactionCallback(XactEvent event, void *arg)
|
|||
* Check again whether shards/placement successfully
|
||||
* committed. This handles failure at COMMIT/PREPARE time.
|
||||
*/
|
||||
PostCommitMarkFailedShardPlacements(CoordinatedTransactionUses2PC);
|
||||
PostCommitMarkFailedShardPlacements(ShouldCoordinatedTransactionUse2PC);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -485,7 +486,7 @@ ResetGlobalVariables()
|
|||
FreeSavedExplainPlan();
|
||||
dlist_init(&InProgressTransactions);
|
||||
activeSetStmts = NULL;
|
||||
CoordinatedTransactionUses2PC = false;
|
||||
ShouldCoordinatedTransactionUse2PC = false;
|
||||
TransactionModifiedNodeMetadata = false;
|
||||
MetadataSyncOnCommit = false;
|
||||
ResetWorkerErrorIndication();
|
||||
|
|
|
@ -96,7 +96,7 @@ SendCommandToWorkerAsUser(const char *nodeName, int32 nodePort, const char *node
|
|||
uint32 connectionFlags = 0;
|
||||
|
||||
UseCoordinatedTransaction();
|
||||
CoordinatedTransactionUse2PC();
|
||||
CoordinatedTransactionShouldUse2PC();
|
||||
|
||||
MultiConnection *transactionConnection = GetNodeUserDatabaseConnection(
|
||||
connectionFlags, nodeName,
|
||||
|
@ -404,7 +404,7 @@ SendCommandToWorkersParamsInternal(TargetWorkerSet targetWorkerSet, const char *
|
|||
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet, ShareLock);
|
||||
|
||||
UseCoordinatedTransaction();
|
||||
CoordinatedTransactionUse2PC();
|
||||
CoordinatedTransactionShouldUse2PC();
|
||||
|
||||
/* open connections in parallel */
|
||||
WorkerNode *workerNode = NULL;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "access/htup_details.h"
|
||||
#include "distributed/distribution_column.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/multi_partitioning_utils.h"
|
||||
#include "distributed/version_compat.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
#include "nodes/nodes.h"
|
||||
|
@ -115,6 +116,53 @@ column_to_column_name(PG_FUNCTION_ARGS)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* FindColumnWithNameOnTargetRelation gets a source table and
|
||||
* column name. The function returns the the column with the
|
||||
* same name on the target table.
|
||||
*
|
||||
* Note that due to dropping columns, the parent's distribution key may not
|
||||
* match the partition's distribution key. See issue #5123.
|
||||
*
|
||||
* The function throws error if the input or output is not valid or does
|
||||
* not exist.
|
||||
*/
|
||||
Var *
|
||||
FindColumnWithNameOnTargetRelation(Oid sourceRelationId, char *sourceColumnName,
|
||||
Oid targetRelationId)
|
||||
{
|
||||
if (sourceColumnName == NULL || sourceColumnName[0] == '\0')
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_UNDEFINED_COLUMN),
|
||||
errmsg("cannot find the given column on table \"%s\"",
|
||||
generate_qualified_relation_name(sourceRelationId))));
|
||||
}
|
||||
|
||||
AttrNumber attributeNumberOnTarget = get_attnum(targetRelationId, sourceColumnName);
|
||||
if (attributeNumberOnTarget == InvalidAttrNumber)
|
||||
{
|
||||
ereport(ERROR, (errmsg("Column \"%s\" does not exist on "
|
||||
"relation \"%s\"", sourceColumnName,
|
||||
get_rel_name(targetRelationId))));
|
||||
}
|
||||
|
||||
Index varNo = 1;
|
||||
Oid targetTypeId = InvalidOid;
|
||||
int32 targetTypMod = 0;
|
||||
Oid targetCollation = InvalidOid;
|
||||
Index varlevelsup = 0;
|
||||
|
||||
/* this function throws error in case anything goes wrong */
|
||||
get_atttypetypmodcoll(targetRelationId, attributeNumberOnTarget,
|
||||
&targetTypeId, &targetTypMod, &targetCollation);
|
||||
Var *targetColumn =
|
||||
makeVar(varNo, attributeNumberOnTarget, targetTypeId, targetTypMod,
|
||||
targetCollation, varlevelsup);
|
||||
|
||||
return targetColumn;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* BuildDistributionKeyFromColumnName builds a simple distribution key consisting
|
||||
* only out of a reference to the column of name columnName. Errors out if the
|
||||
|
|
|
@ -100,9 +100,6 @@ static ForeignConstraintRelationshipNode * CreateOrFindNode(HTAB *adjacencyLists
|
|||
relid);
|
||||
static List * GetConnectedListHelper(ForeignConstraintRelationshipNode *node,
|
||||
bool isReferencing);
|
||||
static HTAB * CreateOidVisitedHashSet(void);
|
||||
static bool OidVisited(HTAB *oidVisitedMap, Oid oid);
|
||||
static void VisitOid(HTAB *oidVisitedMap, Oid oid);
|
||||
static List * GetForeignConstraintRelationshipHelper(Oid relationId, bool isReferencing);
|
||||
|
||||
|
||||
|
@ -442,7 +439,7 @@ GetConnectedListHelper(ForeignConstraintRelationshipNode *node, bool isReferenci
|
|||
* As hash_create allocates memory in heap, callers are responsible to call
|
||||
* hash_destroy when appropriate.
|
||||
*/
|
||||
static HTAB *
|
||||
HTAB *
|
||||
CreateOidVisitedHashSet(void)
|
||||
{
|
||||
HASHCTL info = { 0 };
|
||||
|
@ -464,7 +461,7 @@ CreateOidVisitedHashSet(void)
|
|||
/*
|
||||
* OidVisited returns true if given oid is visited according to given oid hash-set.
|
||||
*/
|
||||
static bool
|
||||
bool
|
||||
OidVisited(HTAB *oidVisitedMap, Oid oid)
|
||||
{
|
||||
bool found = false;
|
||||
|
@ -476,7 +473,7 @@ OidVisited(HTAB *oidVisitedMap, Oid oid)
|
|||
/*
|
||||
* VisitOid sets given oid as visited in given hash-set.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
VisitOid(HTAB *oidVisitedMap, Oid oid)
|
||||
{
|
||||
bool found = false;
|
||||
|
|
|
@ -644,7 +644,8 @@ CitusMaintenanceDaemonMain(Datum main_arg)
|
|||
*/
|
||||
lastShardCleanTime = GetCurrentTimestamp();
|
||||
|
||||
numberOfDroppedShards = TryDropMarkedShards();
|
||||
bool waitForCleanupLock = false;
|
||||
numberOfDroppedShards = TryDropMarkedShards(waitForCleanupLock);
|
||||
}
|
||||
|
||||
CommitTransactionCommand();
|
||||
|
|
|
@ -548,13 +548,14 @@ PartitionParentOid(Oid partitionOid)
|
|||
|
||||
|
||||
/*
|
||||
* LongestPartitionName is a uitility function that returns the partition
|
||||
* name which is the longest in terms of number of characters.
|
||||
* PartitionWithLongestNameRelationId is a utility function that returns the
|
||||
* oid of the partition table that has the longest name in terms of number of
|
||||
* characters.
|
||||
*/
|
||||
char *
|
||||
LongestPartitionName(Oid parentRelationId)
|
||||
Oid
|
||||
PartitionWithLongestNameRelationId(Oid parentRelationId)
|
||||
{
|
||||
char *longestName = NULL;
|
||||
Oid longestNamePartitionId = InvalidOid;
|
||||
int longestNameLength = 0;
|
||||
List *partitionList = PartitionList(parentRelationId);
|
||||
|
||||
|
@ -565,12 +566,12 @@ LongestPartitionName(Oid parentRelationId)
|
|||
int partitionNameLength = strnlen(partitionName, NAMEDATALEN);
|
||||
if (partitionNameLength > longestNameLength)
|
||||
{
|
||||
longestName = partitionName;
|
||||
longestNamePartitionId = partitionRelationId;
|
||||
longestNameLength = partitionNameLength;
|
||||
}
|
||||
}
|
||||
|
||||
return longestName;
|
||||
return longestNamePartitionId;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -193,7 +193,7 @@ EnsureReferenceTablesExistOnAllNodesExtended(char transferMode)
|
|||
int connectionFlags = OUTSIDE_TRANSACTION;
|
||||
|
||||
MultiConnection *connection = GetNodeUserDatabaseConnection(
|
||||
connectionFlags, "localhost", PostPortNumber,
|
||||
connectionFlags, LocalHostName, PostPortNumber,
|
||||
userName, NULL);
|
||||
|
||||
if (PQstatus(connection->pgConn) == CONNECTION_OK)
|
||||
|
|
|
@ -387,6 +387,37 @@ SetLocktagForShardDistributionMetadata(int64 shardId, LOCKTAG *tag)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* LockPlacementCleanup takes an exclusive lock to ensure that only one process
|
||||
* can cleanup placements at the same time.
|
||||
*/
|
||||
void
|
||||
LockPlacementCleanup(void)
|
||||
{
|
||||
LOCKTAG tag;
|
||||
const bool sessionLock = false;
|
||||
const bool dontWait = false;
|
||||
SET_LOCKTAG_PLACEMENT_CLEANUP(tag);
|
||||
(void) LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TryLockPlacementCleanup takes an exclusive lock to ensure that only one
|
||||
* process can cleanup placements at the same time.
|
||||
*/
|
||||
bool
|
||||
TryLockPlacementCleanup(void)
|
||||
{
|
||||
LOCKTAG tag;
|
||||
const bool sessionLock = false;
|
||||
const bool dontWait = true;
|
||||
SET_LOCKTAG_PLACEMENT_CLEANUP(tag);
|
||||
bool lockAcquired = LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait);
|
||||
return lockAcquired;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* LockReferencedReferenceShardDistributionMetadata acquires shard distribution
|
||||
* metadata locks with the given lock mode on the reference tables which has a
|
||||
|
@ -502,8 +533,6 @@ LockShardResource(uint64 shardId, LOCKMODE lockmode)
|
|||
const bool sessionLock = false;
|
||||
const bool dontWait = false;
|
||||
|
||||
AssertArg(shardId != INVALID_SHARD_ID);
|
||||
|
||||
SET_LOCKTAG_SHARD_RESOURCE(tag, MyDatabaseId, shardId);
|
||||
|
||||
(void) LockAcquire(&tag, lockmode, sessionLock, dontWait);
|
||||
|
|
|
@ -41,7 +41,7 @@ alter_role_if_exists(PG_FUNCTION_ARGS)
|
|||
|
||||
Node *parseTree = ParseTreeNode(utilityQuery);
|
||||
|
||||
ProcessUtilityParseTree(parseTree, utilityQuery, PROCESS_UTILITY_TOPLEVEL, NULL,
|
||||
ProcessUtilityParseTree(parseTree, utilityQuery, PROCESS_UTILITY_QUERY, NULL,
|
||||
None_Receiver, NULL);
|
||||
|
||||
PG_RETURN_BOOL(true);
|
||||
|
@ -98,7 +98,7 @@ worker_create_or_alter_role(PG_FUNCTION_ARGS)
|
|||
|
||||
ProcessUtilityParseTree(parseTree,
|
||||
createRoleUtilityQuery,
|
||||
PROCESS_UTILITY_TOPLEVEL,
|
||||
PROCESS_UTILITY_QUERY,
|
||||
NULL,
|
||||
None_Receiver, NULL);
|
||||
|
||||
|
@ -126,7 +126,7 @@ worker_create_or_alter_role(PG_FUNCTION_ARGS)
|
|||
|
||||
ProcessUtilityParseTree(parseTree,
|
||||
alterRoleUtilityQuery,
|
||||
PROCESS_UTILITY_TOPLEVEL,
|
||||
PROCESS_UTILITY_QUERY,
|
||||
NULL,
|
||||
None_Receiver, NULL);
|
||||
|
||||
|
|
|
@ -11,10 +11,17 @@
|
|||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "miscadmin.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/fmgrprotos.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/metadata_utility.h"
|
||||
#include "distributed/relay_utility.h"
|
||||
#include "distributed/shard_utils.h"
|
||||
|
||||
static int GetLargestShardId(void);
|
||||
|
||||
/*
|
||||
* GetTableLocalShardOid returns the oid of the shard from the given distributed
|
||||
* relation with the shardId.
|
||||
|
@ -36,3 +43,81 @@ GetTableLocalShardOid(Oid citusTableOid, uint64 shardId)
|
|||
|
||||
return shardRelationOid;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetLongestShardName is a utility function that returns the name of the shard of a
|
||||
* table that has the longest name in terms of number of characters.
|
||||
*
|
||||
* Both the Oid and name of the table are required so we can create longest shard names
|
||||
* after a RENAME.
|
||||
*/
|
||||
char *
|
||||
GetLongestShardName(Oid citusTableOid, char *finalRelationName)
|
||||
{
|
||||
char *longestShardName = pstrdup(finalRelationName);
|
||||
ShardInterval *shardInterval = LoadShardIntervalWithLongestShardName(citusTableOid);
|
||||
AppendShardIdToName(&longestShardName, shardInterval->shardId);
|
||||
|
||||
return longestShardName;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetLongestShardNameForLocalPartition is a utility function that creates a hypothetical shard
|
||||
* name for a partition table that is not distributed yet.
|
||||
*/
|
||||
char *
|
||||
GetLongestShardNameForLocalPartition(Oid parentTableOid, char *partitionRelationName)
|
||||
{
|
||||
char *longestShardName = pstrdup(partitionRelationName);
|
||||
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(parentTableOid);
|
||||
int shardIntervalCount = cacheEntry->shardIntervalArrayLength;
|
||||
int newShardId = GetLargestShardId() + shardIntervalCount;
|
||||
AppendShardIdToName(&longestShardName, newShardId);
|
||||
|
||||
return longestShardName;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetLargestShardId returns the biggest shard id, and returns a 10^6 in case of failure
|
||||
* to get the last value from the sequence.
|
||||
*/
|
||||
int
|
||||
GetLargestShardId()
|
||||
{
|
||||
Oid savedUserId = InvalidOid;
|
||||
int savedSecurityContext = 0;
|
||||
|
||||
GetUserIdAndSecContext(&savedUserId, &savedSecurityContext);
|
||||
SetUserIdAndSecContext(CitusExtensionOwner(), SECURITY_LOCAL_USERID_CHANGE);
|
||||
|
||||
text *sequenceName = cstring_to_text(SHARDID_SEQUENCE_NAME);
|
||||
Oid sequenceId = ResolveRelationId(sequenceName, false);
|
||||
Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId);
|
||||
|
||||
volatile int64 largestShardId = 0;
|
||||
|
||||
/*
|
||||
* pg_sequence_last_value() returns NULL if the sequence value is not yet used.
|
||||
* DirectFunctionCall1() gives an ERROR message on NULL return values, and that's why we
|
||||
* need a PG_TRY block.
|
||||
*/
|
||||
PG_TRY();
|
||||
{
|
||||
Datum lastShardIdDatum = DirectFunctionCall1(pg_sequence_last_value,
|
||||
sequenceIdDatum);
|
||||
largestShardId = DatumGetInt64(lastShardIdDatum);
|
||||
}
|
||||
PG_CATCH();
|
||||
{
|
||||
/* assume that we have a shardId with 7 digits */
|
||||
largestShardId = 1000000;
|
||||
}
|
||||
PG_END_TRY();
|
||||
|
||||
SetUserIdAndSecContext(savedUserId, savedSecurityContext);
|
||||
|
||||
return largestShardId;
|
||||
}
|
||||
|
|
|
@ -297,7 +297,7 @@ FindShardIntervalIndex(Datum searchedValue, CitusTableCacheEntry *cacheEntry)
|
|||
ShardInterval **shardIntervalCache = cacheEntry->sortedShardIntervalArray;
|
||||
int shardCount = cacheEntry->shardIntervalArrayLength;
|
||||
FmgrInfo *compareFunction = cacheEntry->shardIntervalCompareFunction;
|
||||
bool useBinarySearch = (IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) ||
|
||||
bool useBinarySearch = (!IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) ||
|
||||
!cacheEntry->hasUniformHashDistribution);
|
||||
int shardIndex = INVALID_SHARD_INDEX;
|
||||
|
||||
|
|
|
@ -111,12 +111,12 @@ worker_create_or_replace_object(PG_FUNCTION_ARGS)
|
|||
RenameStmt *renameStmt = CreateRenameStatement(&address, newName);
|
||||
const char *sqlRenameStmt = DeparseTreeNode((Node *) renameStmt);
|
||||
ProcessUtilityParseTree((Node *) renameStmt, sqlRenameStmt,
|
||||
PROCESS_UTILITY_TOPLEVEL,
|
||||
PROCESS_UTILITY_QUERY,
|
||||
NULL, None_Receiver, NULL);
|
||||
}
|
||||
|
||||
/* apply create statement locally */
|
||||
ProcessUtilityParseTree(parseTree, sqlStatement, PROCESS_UTILITY_TOPLEVEL, NULL,
|
||||
ProcessUtilityParseTree(parseTree, sqlStatement, PROCESS_UTILITY_QUERY, NULL,
|
||||
None_Receiver, NULL);
|
||||
|
||||
/* type has been created */
|
||||
|
|
|
@ -28,13 +28,14 @@
|
|||
#include "commands/extension.h"
|
||||
#include "commands/sequence.h"
|
||||
#include "distributed/citus_ruleutils.h"
|
||||
#include "distributed/commands/multi_copy.h"
|
||||
#include "distributed/commands/utility_hook.h"
|
||||
#include "distributed/connection_management.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/intermediate_results.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/multi_client_executor.h"
|
||||
#include "distributed/commands/multi_copy.h"
|
||||
#include "distributed/multi_logical_optimizer.h"
|
||||
#include "distributed/multi_partitioning_utils.h"
|
||||
#include "distributed/multi_server_executor.h"
|
||||
|
@ -45,6 +46,7 @@
|
|||
#include "distributed/worker_protocol.h"
|
||||
#include "distributed/version_compat.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
#include "parser/parse_relation.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "tcop/tcopprot.h"
|
||||
#include "tcop/utility.h"
|
||||
|
@ -396,7 +398,7 @@ worker_apply_shard_ddl_command(PG_FUNCTION_ARGS)
|
|||
|
||||
/* extend names in ddl command and apply extended command */
|
||||
RelayEventExtendNames(ddlCommandNode, schemaName, shardId);
|
||||
ProcessUtilityParseTree(ddlCommandNode, ddlCommand, PROCESS_UTILITY_TOPLEVEL, NULL,
|
||||
ProcessUtilityParseTree(ddlCommandNode, ddlCommand, PROCESS_UTILITY_QUERY, NULL,
|
||||
None_Receiver, NULL);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
|
@ -428,7 +430,7 @@ worker_apply_inter_shard_ddl_command(PG_FUNCTION_ARGS)
|
|||
RelayEventExtendNamesForInterShardCommands(ddlCommandNode, leftShardId,
|
||||
leftShardSchemaName, rightShardId,
|
||||
rightShardSchemaName);
|
||||
ProcessUtilityParseTree(ddlCommandNode, ddlCommand, PROCESS_UTILITY_TOPLEVEL, NULL,
|
||||
ProcessUtilityParseTree(ddlCommandNode, ddlCommand, PROCESS_UTILITY_QUERY, NULL,
|
||||
None_Receiver, NULL);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
|
@ -461,7 +463,7 @@ worker_apply_sequence_command(PG_FUNCTION_ARGS)
|
|||
}
|
||||
|
||||
/* run the CREATE SEQUENCE command */
|
||||
ProcessUtilityParseTree(commandNode, commandString, PROCESS_UTILITY_TOPLEVEL, NULL,
|
||||
ProcessUtilityParseTree(commandNode, commandString, PROCESS_UTILITY_QUERY, NULL,
|
||||
None_Receiver, NULL);
|
||||
CommandCounterIncrement();
|
||||
|
||||
|
@ -594,9 +596,6 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
|
|||
char *sourceSchemaName = NULL;
|
||||
char *sourceTableName = NULL;
|
||||
|
||||
Oid savedUserId = InvalidOid;
|
||||
int savedSecurityContext = 0;
|
||||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
/* We extract schema names and table names from qualified names */
|
||||
|
@ -613,10 +612,13 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
|
|||
uint64 shardId = ExtractShardIdFromTableName(shardTableName, false);
|
||||
LockShardResource(shardId, AccessExclusiveLock);
|
||||
|
||||
/* copy remote table's data to this node */
|
||||
/*
|
||||
* Copy into intermediate results directory, which is automatically cleaned on
|
||||
* error.
|
||||
*/
|
||||
StringInfo localFilePath = makeStringInfo();
|
||||
appendStringInfo(localFilePath, "base/%s/%s" UINT64_FORMAT,
|
||||
PG_JOB_CACHE_DIR, TABLE_FILE_PREFIX, shardId);
|
||||
appendStringInfo(localFilePath, "%s/worker_append_table_to_shard_" UINT64_FORMAT,
|
||||
CreateIntermediateResultsDirectory(), shardId);
|
||||
|
||||
char *sourceQualifiedName = quote_qualified_identifier(sourceSchemaName,
|
||||
sourceTableName);
|
||||
|
@ -641,7 +643,8 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
|
|||
appendStringInfo(sourceCopyCommand, COPY_OUT_COMMAND, sourceQualifiedName);
|
||||
}
|
||||
|
||||
bool received = ReceiveRegularFile(sourceNodeName, sourceNodePort, NULL,
|
||||
char *userName = CurrentUserName();
|
||||
bool received = ReceiveRegularFile(sourceNodeName, sourceNodePort, userName,
|
||||
sourceCopyCommand,
|
||||
localFilePath);
|
||||
if (!received)
|
||||
|
@ -664,17 +667,36 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
|
|||
/* make sure we are allowed to execute the COPY command */
|
||||
CheckCopyPermissions(localCopyCommand);
|
||||
|
||||
/* need superuser to copy from files */
|
||||
GetUserIdAndSecContext(&savedUserId, &savedSecurityContext);
|
||||
SetUserIdAndSecContext(CitusExtensionOwner(), SECURITY_LOCAL_USERID_CHANGE);
|
||||
Relation shardRelation = table_openrv(localCopyCommand->relation, RowExclusiveLock);
|
||||
|
||||
ProcessUtilityParseTree((Node *) localCopyCommand, queryString->data,
|
||||
PROCESS_UTILITY_TOPLEVEL, NULL, None_Receiver, NULL);
|
||||
/* mimic check from copy.c */
|
||||
if (XactReadOnly && !shardRelation->rd_islocaltemp)
|
||||
{
|
||||
PreventCommandIfReadOnly("COPY FROM");
|
||||
}
|
||||
|
||||
SetUserIdAndSecContext(savedUserId, savedSecurityContext);
|
||||
ParseState *parseState = make_parsestate(NULL);
|
||||
(void) addRangeTableEntryForRelation(parseState, shardRelation,
|
||||
#if PG_VERSION_NUM >= PG_VERSION_12
|
||||
RowExclusiveLock,
|
||||
#endif
|
||||
NULL, false, false);
|
||||
|
||||
CopyState copyState = BeginCopyFrom(parseState,
|
||||
shardRelation,
|
||||
localCopyCommand->filename,
|
||||
localCopyCommand->is_program,
|
||||
NULL,
|
||||
localCopyCommand->attlist,
|
||||
localCopyCommand->options);
|
||||
CopyFrom(copyState);
|
||||
EndCopyFrom(copyState);
|
||||
|
||||
free_parsestate(parseState);
|
||||
|
||||
/* finally delete the temporary file we created */
|
||||
CitusDeleteFile(localFilePath->data);
|
||||
table_close(shardRelation, NoLock);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
@ -782,7 +804,7 @@ AlterSequenceMinMax(Oid sequenceId, char *schemaName, char *sequenceName,
|
|||
|
||||
/* since the command is an AlterSeqStmt, a dummy command string works fine */
|
||||
ProcessUtilityParseTree((Node *) alterSequenceStatement, dummyString,
|
||||
PROCESS_UTILITY_TOPLEVEL, NULL, None_Receiver, NULL);
|
||||
PROCESS_UTILITY_QUERY, NULL, None_Receiver, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
|
||||
#include "executor/spi.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
#include "parser/parse_relation.h"
|
||||
#include "parser/parse_type.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "utils/acl.h"
|
||||
|
@ -183,8 +184,6 @@ worker_merge_files_into_table(PG_FUNCTION_ARGS)
|
|||
StringInfo jobSchemaName = JobSchemaName(jobId);
|
||||
StringInfo taskTableName = TaskTableName(taskId);
|
||||
StringInfo taskDirectoryName = TaskDirectoryName(jobId, taskId);
|
||||
Oid savedUserId = InvalidOid;
|
||||
int savedSecurityContext = 0;
|
||||
Oid userId = GetUserId();
|
||||
|
||||
/* we should have the same number of column names and types */
|
||||
|
@ -233,14 +232,9 @@ worker_merge_files_into_table(PG_FUNCTION_ARGS)
|
|||
|
||||
CreateTaskTable(jobSchemaName, taskTableName, columnNameList, columnTypeList);
|
||||
|
||||
/* need superuser to copy from files */
|
||||
GetUserIdAndSecContext(&savedUserId, &savedSecurityContext);
|
||||
SetUserIdAndSecContext(CitusExtensionOwner(), SECURITY_LOCAL_USERID_CHANGE);
|
||||
|
||||
CopyTaskFilesFromDirectory(jobSchemaName, taskTableName, taskDirectoryName,
|
||||
userId);
|
||||
|
||||
SetUserIdAndSecContext(savedUserId, savedSecurityContext);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
|
@ -569,8 +563,8 @@ CopyTaskFilesFromDirectory(StringInfo schemaName, StringInfo relationName,
|
|||
appendStringInfo(fullFilename, "%s/%s", directoryName, baseFilename);
|
||||
|
||||
/* build relation object and copy statement */
|
||||
RangeVar *relation = makeRangeVar(schemaName->data, relationName->data, -1);
|
||||
CopyStmt *copyStatement = CopyStatement(relation, fullFilename->data);
|
||||
RangeVar *rangeVar = makeRangeVar(schemaName->data, relationName->data, -1);
|
||||
CopyStmt *copyStatement = CopyStatement(rangeVar, fullFilename->data);
|
||||
if (BinaryWorkerCopyFormat)
|
||||
{
|
||||
DefElem *copyOption = makeDefElem("format", (Node *) makeString("binary"),
|
||||
|
@ -579,12 +573,28 @@ CopyTaskFilesFromDirectory(StringInfo schemaName, StringInfo relationName,
|
|||
}
|
||||
|
||||
{
|
||||
ParseState *pstate = make_parsestate(NULL);
|
||||
pstate->p_sourcetext = queryString;
|
||||
ParseState *parseState = make_parsestate(NULL);
|
||||
parseState->p_sourcetext = queryString;
|
||||
|
||||
DoCopy(pstate, copyStatement, -1, -1, &copiedRowCount);
|
||||
Relation relation = table_openrv(rangeVar, RowExclusiveLock);
|
||||
(void) addRangeTableEntryForRelation(parseState, relation,
|
||||
#if PG_VERSION_NUM >= PG_VERSION_12
|
||||
RowExclusiveLock,
|
||||
#endif
|
||||
NULL, false, false);
|
||||
|
||||
free_parsestate(pstate);
|
||||
CopyState copyState = BeginCopyFrom(parseState,
|
||||
relation,
|
||||
copyStatement->filename,
|
||||
copyStatement->is_program,
|
||||
NULL,
|
||||
copyStatement->attlist,
|
||||
copyStatement->options);
|
||||
copiedRowCount = CopyFrom(copyState);
|
||||
EndCopyFrom(copyState);
|
||||
|
||||
free_parsestate(parseState);
|
||||
table_close(relation, NoLock);
|
||||
}
|
||||
|
||||
copiedRowTotal += copiedRowCount;
|
||||
|
|
|
@ -24,6 +24,14 @@
|
|||
/* controlled via GUC, should be accessed via EnableLocalReferenceForeignKeys() */
|
||||
extern bool EnableLocalReferenceForeignKeys;
|
||||
|
||||
extern void SwitchToSequentialAndLocalExecutionIfRelationNameTooLong(Oid relationId,
|
||||
char *
|
||||
finalRelationName);
|
||||
extern void SwitchToSequentialAndLocalExecutionIfPartitionNameTooLong(Oid
|
||||
parentRelationId,
|
||||
Oid
|
||||
partitionRelationId);
|
||||
|
||||
|
||||
/*
|
||||
* DistributeObjectOps specifies handlers for node/object type pairs.
|
||||
|
|
|
@ -200,8 +200,12 @@ extern int NodeConnectionTimeout;
|
|||
/* maximum number of connections to cache per worker per session */
|
||||
extern int MaxCachedConnectionsPerWorker;
|
||||
|
||||
/* maximum lifetime of connections in miliseconds */
|
||||
extern int MaxCachedConnectionLifetime;
|
||||
|
||||
/* parameters used for outbound connections */
|
||||
extern char *NodeConninfo;
|
||||
extern char *LocalHostName;
|
||||
|
||||
/* the hash tables are externally accessiable */
|
||||
extern HTAB *ConnectionHash;
|
||||
|
@ -258,4 +262,5 @@ extern bool IsCitusInitiatedRemoteBackend(void);
|
|||
extern double MillisecondsPassedSince(instr_time moment);
|
||||
extern long MillisecondsToTimeout(instr_time start, long msAfterStart);
|
||||
|
||||
extern void WarmUpConnParamsHash(void);
|
||||
#endif /* CONNECTION_MANAGMENT_H */
|
||||
|
|
|
@ -28,7 +28,8 @@ extern void SetTaskQueryString(Task *task, char *queryString);
|
|||
extern void SetTaskQueryStringList(Task *task, List *queryStringList);
|
||||
extern char * TaskQueryString(Task *task);
|
||||
extern char * TaskQueryStringAtIndex(Task *task, int index);
|
||||
extern bool UpdateRelationsToLocalShardTables(Node *node, List *relationShardList);
|
||||
extern int GetTaskQueryType(Task *task);
|
||||
extern void AddInsertAliasIfNeeded(Query *query);
|
||||
|
||||
|
||||
#endif /* DEPARSE_SHARD_QUERY_H */
|
||||
|
|
|
@ -67,6 +67,9 @@ typedef struct RelationRestriction
|
|||
|
||||
/* list of RootPlanParams for all outer nodes */
|
||||
List *outerPlanParamsList;
|
||||
|
||||
/* list of translated vars, this is copied from postgres since it gets deleted on postgres*/
|
||||
List *translatedVars;
|
||||
} RelationRestriction;
|
||||
|
||||
typedef struct JoinRestrictionContext
|
||||
|
@ -219,9 +222,9 @@ extern PlannedStmt * distributed_planner(Query *parse,
|
|||
#define LOCAL_TABLE_SUBQUERY_CTE_HINT \
|
||||
"Use CTE's or subqueries to select from local tables and use them in joins"
|
||||
|
||||
|
||||
extern List * ExtractRangeTableEntryList(Query *query);
|
||||
extern bool NeedsDistributedPlanning(Query *query);
|
||||
extern List * TranslatedVarsForRteIdentity(int rteIdentity);
|
||||
extern struct DistributedPlan * GetDistributedPlan(CustomScan *node);
|
||||
extern void multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo,
|
||||
Index restrictionIndex, RangeTblEntry *rte);
|
||||
|
@ -238,6 +241,7 @@ extern Node * ResolveExternalParams(Node *inputNode, ParamListInfo boundParams);
|
|||
extern bool IsMultiTaskPlan(struct DistributedPlan *distributedPlan);
|
||||
extern RangeTblEntry * RemoteScanRangeTableEntry(List *columnNameList);
|
||||
extern int GetRTEIdentity(RangeTblEntry *rte);
|
||||
extern bool GetOriginalInh(RangeTblEntry *rte);
|
||||
extern LOCKMODE GetQueryLockMode(Query *query);
|
||||
extern int32 BlessRecordExpression(Expr *expr);
|
||||
extern void DissuadePlannerFromUsingPlan(PlannedStmt *plan);
|
||||
|
|
|
@ -19,6 +19,9 @@
|
|||
|
||||
|
||||
/* Remaining metadata utility functions */
|
||||
extern Var * FindColumnWithNameOnTargetRelation(Oid sourceRelationId,
|
||||
char *sourceColumnName,
|
||||
Oid targetRelationId);
|
||||
extern Var * BuildDistributionKeyFromColumnName(Relation distributedRelation,
|
||||
char *columnName);
|
||||
extern char * ColumnToColumnName(Oid relationId, char *columnNodeString);
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue