mirror of https://github.com/citusdata/citus.git
Compare commits
95 Commits
Author | SHA1 | Date |
---|---|---|
|
11db00990f | |
|
bd5544fdc5 | |
|
e5e50570b3 | |
|
758cda1394 | |
|
8b4bab14a2 | |
|
115f2c124a | |
|
4b7af5aaaf | |
|
6b87b3ea27 | |
|
d13b989cff | |
|
0f62f1a93a | |
|
83585e32f9 | |
|
0f1f55c287 | |
|
3e8348c29e | |
|
537618aaed | |
|
6c989830d2 | |
|
259511746e | |
|
bd245b5fbb | |
|
25c71fb3d0 | |
|
28a503fad9 | |
|
30b46975b8 | |
|
5f5e5ef471 | |
|
5a1036e361 | |
|
6de2a09d79 | |
|
d485003807 | |
|
32124efd83 | |
|
c84d1d9e70 | |
|
b46f8874d3 | |
|
1492bd1e8b | |
|
4082fab0c9 | |
|
4ca544200c | |
|
e58b78f1e8 | |
|
f526eec6a8 | |
|
5759233f15 | |
|
6640c76bde | |
|
11d5d21fd8 | |
|
4fbed90505 | |
|
7214673a9f | |
|
79a274e226 | |
|
dd2dfac198 | |
|
3bcfadf2f1 | |
|
f5a7858ab9 | |
|
d7b90e0804 | |
|
74985a0977 | |
|
57a52b01a2 | |
|
c24088e12f | |
|
9a2227c70d | |
|
826ac1b099 | |
|
d9514fa697 | |
|
2f27325b15 | |
|
f41b5060f0 | |
|
823ede78ab | |
|
2ea3618f22 | |
|
88825b89a1 | |
|
a216c6b62c | |
|
fcb932268a | |
|
1200c8fd1c | |
|
0237d826d5 | |
|
e54b253713 | |
|
61efc87c53 | |
|
f5608c2769 | |
|
ecf0f2fdbf | |
|
0a09551dab | |
|
0805ef9c79 | |
|
a6435b7f6b | |
|
f13cf336f2 | |
|
46e316881b | |
|
18ab327c6c | |
|
61a89c69cd | |
|
ad9469b351 | |
|
4121788848 | |
|
e9bf5fa235 | |
|
18c7a3c188 | |
|
85a87af11c | |
|
115fa950d3 | |
|
445291d94b | |
|
28f1c2129d | |
|
205b8ec70a | |
|
6fa25d73be | |
|
bfb1ca6d0d | |
|
b355f0d9a2 | |
|
fdcb6ead43 | |
|
3fcb011b67 | |
|
8228815b38 | |
|
270234c7ff | |
|
3131d3e3c5 | |
|
a7f9dfc3f0 | |
|
049cd55346 | |
|
27ecb5cde2 | |
|
fc08ec203f | |
|
495470d291 | |
|
39a142b4d9 | |
|
ca4b529751 | |
|
e48f5d804d | |
|
85e2c6b523 | |
|
2a390b4c1d |
|
@ -365,7 +365,7 @@ jobs:
|
||||||
when: on_fail
|
when: on_fail
|
||||||
- store_artifacts:
|
- store_artifacts:
|
||||||
name: 'Save tap logs'
|
name: 'Save tap logs'
|
||||||
path: /home/circleci/project/src/test/recovery/tmp_check/log
|
path: /home/circleci/project/src/test/<< parameters.suite >>/tmp_check/log
|
||||||
when: on_fail
|
when: on_fail
|
||||||
- store_artifacts:
|
- store_artifacts:
|
||||||
name: 'Save core dumps'
|
name: 'Save core dumps'
|
||||||
|
@ -552,6 +552,12 @@ workflows:
|
||||||
image_tag: '12.4'
|
image_tag: '12.4'
|
||||||
suite: recovery
|
suite: recovery
|
||||||
requires: [build-12]
|
requires: [build-12]
|
||||||
|
- tap-test-citus:
|
||||||
|
name: 'test-12_tap-columnar-freezing'
|
||||||
|
pg_major: 12
|
||||||
|
image_tag: '12.4'
|
||||||
|
suite: columnar_freezing
|
||||||
|
requires: [build-12]
|
||||||
- test-citus:
|
- test-citus:
|
||||||
name: 'test-12_check-failure'
|
name: 'test-12_check-failure'
|
||||||
pg_major: 12
|
pg_major: 12
|
||||||
|
@ -620,6 +626,12 @@ workflows:
|
||||||
image_tag: '13.0'
|
image_tag: '13.0'
|
||||||
suite: recovery
|
suite: recovery
|
||||||
requires: [build-13]
|
requires: [build-13]
|
||||||
|
- tap-test-citus:
|
||||||
|
name: 'test-13_tap-columnar-freezing'
|
||||||
|
pg_major: 13
|
||||||
|
image_tag: '13.0'
|
||||||
|
suite: columnar_freezing
|
||||||
|
requires: [build-13]
|
||||||
- test-citus:
|
- test-citus:
|
||||||
name: 'test-13_check-failure'
|
name: 'test-13_check-failure'
|
||||||
pg_major: 13
|
pg_major: 13
|
||||||
|
|
126
CHANGELOG.md
126
CHANGELOG.md
|
@ -1,3 +1,129 @@
|
||||||
|
### citus v10.0.8 (April 20, 2023) ###
|
||||||
|
|
||||||
|
* Fixes a bug that could break `DROP SCHEMA/EXTENSON` commands when there is a
|
||||||
|
columnar table (#5458)
|
||||||
|
|
||||||
|
* Fixes a crash that occurs when the aggregate that cannot be pushed-down
|
||||||
|
returns empty result from a worker (#5679)
|
||||||
|
|
||||||
|
* Fixes columnar freezing/wraparound bug (#5962)
|
||||||
|
|
||||||
|
* Fixes memory leak issue with query results that returns single row (#6724)
|
||||||
|
|
||||||
|
* Prevents alter table functions from dropping extensions (#5974)
|
||||||
|
|
||||||
|
### citus v10.0.6 (November 12, 2021) ###
|
||||||
|
|
||||||
|
* Adds missing version checks for columnar tables
|
||||||
|
|
||||||
|
* Fixes a bug that caused `worker_append_table_to_shard` to write as superuser
|
||||||
|
|
||||||
|
* Fixes a bug with local cached plans on tables with dropped columns
|
||||||
|
|
||||||
|
* Fixes a missing `FROM` clause entry error
|
||||||
|
|
||||||
|
* Fixes a use after free issue that could happen when altering a distributed
|
||||||
|
table
|
||||||
|
|
||||||
|
* Reinstates optimisation for uniform shard interval ranges
|
||||||
|
|
||||||
|
### citus v10.0.5 (August 16, 2021) ###
|
||||||
|
|
||||||
|
* Allows more graceful failovers when replication factor > 1
|
||||||
|
|
||||||
|
* Fixes a bug that causes partitions to have wrong distribution key after
|
||||||
|
`DROP COLUMN`
|
||||||
|
|
||||||
|
* Improves citus_update_table_statistics and provides distributed deadlock
|
||||||
|
detection
|
||||||
|
|
||||||
|
### citus v10.0.4 (July 14, 2021) ###
|
||||||
|
|
||||||
|
* Introduces `citus.local_hostname` GUC for connections to the current node
|
||||||
|
|
||||||
|
* Removes dependencies on the existence of public schema
|
||||||
|
|
||||||
|
* Removes limits around long partition names
|
||||||
|
|
||||||
|
* Fixes a bug that can cause a crash when DEBUG4 logging is enabled
|
||||||
|
|
||||||
|
* Fixes a bug that causes pruning incorrect shard of a range distributed table
|
||||||
|
|
||||||
|
* Fixes an issue that could cause citus_finish_pg_upgrade to fail
|
||||||
|
|
||||||
|
* Fixes FROM ONLY queries on partitioned tables
|
||||||
|
|
||||||
|
* Fixes issues caused by public schema being omitted in queries
|
||||||
|
|
||||||
|
* Fixes problems with concurrent calls of DropMarkedShards
|
||||||
|
|
||||||
|
* Fixes relname null bug when using parallel execution
|
||||||
|
|
||||||
|
* Fixes two race conditions in the get_rebalance_progress
|
||||||
|
|
||||||
|
### citus v10.0.3 (March 16, 2021) ###
|
||||||
|
|
||||||
|
* Prevents infinite recursion for queries that involve `UNION ALL`
|
||||||
|
below `JOIN`
|
||||||
|
|
||||||
|
* Fixes a crash in queries with a modifying `CTE` and a `SELECT`
|
||||||
|
without `FROM`
|
||||||
|
|
||||||
|
* Fixes upgrade and downgrade paths for `citus_update_table_statistics`
|
||||||
|
|
||||||
|
* Fixes a bug that causes `SELECT` queries to use 2PC unnecessarily
|
||||||
|
|
||||||
|
* Fixes a bug that might cause self-deadlocks with
|
||||||
|
`CREATE INDEX` / `REINDEX CONCURRENTLY` commands
|
||||||
|
|
||||||
|
* Adds `citus.max_cached_connection_lifetime` GUC to set maximum connection
|
||||||
|
lifetime
|
||||||
|
|
||||||
|
* Adds `citus.remote_copy_flush_threshold` GUC that controls
|
||||||
|
per-shard memory usages by `COPY`
|
||||||
|
|
||||||
|
* Adds `citus_get_active_worker_nodes` UDF to deprecate
|
||||||
|
`master_get_active_worker_nodes`
|
||||||
|
|
||||||
|
* Skips 2PC for readonly connections in a transaction
|
||||||
|
|
||||||
|
* Makes sure that local execution starts coordinated transaction
|
||||||
|
|
||||||
|
* Removes open temporary file warning when cancelling a query with
|
||||||
|
an open tuple store
|
||||||
|
|
||||||
|
* Relaxes the locks when adding an existing node
|
||||||
|
|
||||||
|
### citus v10.0.2 (March 3, 2021) ###
|
||||||
|
|
||||||
|
* Adds a configure flag to enforce security
|
||||||
|
|
||||||
|
* Fixes a bug due to cross join without target list
|
||||||
|
|
||||||
|
* Fixes a bug with `UNION ALL` on PG 13
|
||||||
|
|
||||||
|
* Fixes a compatibility issue with pg_audit in utility calls
|
||||||
|
|
||||||
|
* Fixes insert query with CTEs/sublinks/subqueries etc
|
||||||
|
|
||||||
|
* Grants `SELECT` permission on `citus_tables` view to `public`
|
||||||
|
|
||||||
|
* Grants `SELECT` permission on columnar metadata tables to `public`
|
||||||
|
|
||||||
|
* Improves `citus_update_table_statistics` and provides distributed deadlock
|
||||||
|
detection
|
||||||
|
|
||||||
|
* Preserves colocation with procedures in `alter_distributed_table`
|
||||||
|
|
||||||
|
* Prevents using `alter_columnar_table_set` and `alter_columnar_table_reset`
|
||||||
|
on a columnar table not owned by the user
|
||||||
|
|
||||||
|
* Removes limits around long table names
|
||||||
|
|
||||||
|
### citus v10.0.1 (February 19, 2021) ###
|
||||||
|
|
||||||
|
* Fixes an issue in creation of `pg_catalog.time_partitions` view
|
||||||
|
|
||||||
### citus v10.0.0 (February 16, 2021) ###
|
### citus v10.0.0 (February 16, 2021) ###
|
||||||
|
|
||||||
* Adds support for per-table option for columnar storage
|
* Adds support for per-table option for columnar storage
|
||||||
|
|
|
@ -86,6 +86,7 @@ endif
|
||||||
|
|
||||||
# Add options passed to configure or computed therein, to CFLAGS/CPPFLAGS/...
|
# Add options passed to configure or computed therein, to CFLAGS/CPPFLAGS/...
|
||||||
override CFLAGS += @CFLAGS@ @CITUS_CFLAGS@
|
override CFLAGS += @CFLAGS@ @CITUS_CFLAGS@
|
||||||
|
override BITCODE_CFLAGS := $(BITCODE_CFLAGS) @CITUS_BITCODE_CFLAGS@
|
||||||
ifneq ($(GIT_VERSION),)
|
ifneq ($(GIT_VERSION),)
|
||||||
override CFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\"
|
override CFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\"
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.69 for Citus 10.0devel.
|
# Generated by GNU Autoconf 2.69 for Citus 10.0.8.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||||
|
@ -579,8 +579,8 @@ MAKEFLAGS=
|
||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='Citus'
|
PACKAGE_NAME='Citus'
|
||||||
PACKAGE_TARNAME='citus'
|
PACKAGE_TARNAME='citus'
|
||||||
PACKAGE_VERSION='10.0devel'
|
PACKAGE_VERSION='10.0.8'
|
||||||
PACKAGE_STRING='Citus 10.0devel'
|
PACKAGE_STRING='Citus 10.0.8'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
|
@ -628,8 +628,10 @@ POSTGRES_BUILDDIR
|
||||||
POSTGRES_SRCDIR
|
POSTGRES_SRCDIR
|
||||||
CITUS_LDFLAGS
|
CITUS_LDFLAGS
|
||||||
CITUS_CPPFLAGS
|
CITUS_CPPFLAGS
|
||||||
|
CITUS_BITCODE_CFLAGS
|
||||||
CITUS_CFLAGS
|
CITUS_CFLAGS
|
||||||
GIT_BIN
|
GIT_BIN
|
||||||
|
with_security_flags
|
||||||
with_zstd
|
with_zstd
|
||||||
with_lz4
|
with_lz4
|
||||||
EGREP
|
EGREP
|
||||||
|
@ -696,6 +698,7 @@ with_libcurl
|
||||||
with_reports_hostname
|
with_reports_hostname
|
||||||
with_lz4
|
with_lz4
|
||||||
with_zstd
|
with_zstd
|
||||||
|
with_security_flags
|
||||||
'
|
'
|
||||||
ac_precious_vars='build_alias
|
ac_precious_vars='build_alias
|
||||||
host_alias
|
host_alias
|
||||||
|
@ -1258,7 +1261,7 @@ if test "$ac_init_help" = "long"; then
|
||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures Citus 10.0devel to adapt to many kinds of systems.
|
\`configure' configures Citus 10.0.8 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
|
@ -1320,7 +1323,7 @@ fi
|
||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of Citus 10.0devel:";;
|
short | recursive ) echo "Configuration of Citus 10.0.8:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
|
@ -1342,6 +1345,7 @@ Optional Packages:
|
||||||
and update checks
|
and update checks
|
||||||
--without-lz4 do not use lz4
|
--without-lz4 do not use lz4
|
||||||
--without-zstd do not use zstd
|
--without-zstd do not use zstd
|
||||||
|
--with-security-flags use security flags
|
||||||
|
|
||||||
Some influential environment variables:
|
Some influential environment variables:
|
||||||
PG_CONFIG Location to find pg_config for target PostgreSQL instalation
|
PG_CONFIG Location to find pg_config for target PostgreSQL instalation
|
||||||
|
@ -1422,7 +1426,7 @@ fi
|
||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
Citus configure 10.0devel
|
Citus configure 10.0.8
|
||||||
generated by GNU Autoconf 2.69
|
generated by GNU Autoconf 2.69
|
||||||
|
|
||||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||||
|
@ -1905,7 +1909,7 @@ cat >config.log <<_ACEOF
|
||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by Citus $as_me 10.0devel, which was
|
It was created by Citus $as_me 10.0.8, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
$ $0 $@
|
$ $0 $@
|
||||||
|
@ -4346,6 +4350,48 @@ if test x"$citusac_cv_prog_cc_cflags__Werror_return_type" = x"yes"; then
|
||||||
CITUS_CFLAGS="$CITUS_CFLAGS -Werror=return-type"
|
CITUS_CFLAGS="$CITUS_CFLAGS -Werror=return-type"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Security flags
|
||||||
|
# Flags taken from: https://liquid.microsoft.com/Web/Object/Read/ms.security/Requirements/Microsoft.Security.SystemsADM.10203#guide
|
||||||
|
# We do not enforce the following flag because it is only available on GCC>=8
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC supports -fstack-clash-protection" >&5
|
||||||
|
$as_echo_n "checking whether $CC supports -fstack-clash-protection... " >&6; }
|
||||||
|
if ${citusac_cv_prog_cc_cflags__fstack_clash_protection+:} false; then :
|
||||||
|
$as_echo_n "(cached) " >&6
|
||||||
|
else
|
||||||
|
citusac_save_CFLAGS=$CFLAGS
|
||||||
|
flag=-fstack-clash-protection
|
||||||
|
case $flag in -Wno*)
|
||||||
|
flag=-W$(echo $flag | cut -c 6-)
|
||||||
|
esac
|
||||||
|
CFLAGS="$citusac_save_CFLAGS $flag"
|
||||||
|
ac_save_c_werror_flag=$ac_c_werror_flag
|
||||||
|
ac_c_werror_flag=yes
|
||||||
|
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||||
|
/* end confdefs.h. */
|
||||||
|
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
|
||||||
|
;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
_ACEOF
|
||||||
|
if ac_fn_c_try_compile "$LINENO"; then :
|
||||||
|
citusac_cv_prog_cc_cflags__fstack_clash_protection=yes
|
||||||
|
else
|
||||||
|
citusac_cv_prog_cc_cflags__fstack_clash_protection=no
|
||||||
|
fi
|
||||||
|
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||||
|
ac_c_werror_flag=$ac_save_c_werror_flag
|
||||||
|
CFLAGS="$citusac_save_CFLAGS"
|
||||||
|
fi
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $citusac_cv_prog_cc_cflags__fstack_clash_protection" >&5
|
||||||
|
$as_echo "$citusac_cv_prog_cc_cflags__fstack_clash_protection" >&6; }
|
||||||
|
if test x"$citusac_cv_prog_cc_cflags__fstack_clash_protection" = x"yes"; then
|
||||||
|
CITUS_CFLAGS="$CITUS_CFLAGS -fstack-clash-protection"
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# --enable-coverage enables generation of code coverage metrics with gcov
|
# --enable-coverage enables generation of code coverage metrics with gcov
|
||||||
|
@ -4493,8 +4539,8 @@ if test "$version_num" != '11'; then
|
||||||
$as_echo "#define HAS_TABLEAM 1" >>confdefs.h
|
$as_echo "#define HAS_TABLEAM 1" >>confdefs.h
|
||||||
|
|
||||||
else
|
else
|
||||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: postgres version does not support table access methodds" >&5
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: postgres version does not support table access methods" >&5
|
||||||
$as_echo "$as_me: postgres version does not support table access methodds" >&6;}
|
$as_echo "$as_me: postgres version does not support table access methods" >&6;}
|
||||||
fi;
|
fi;
|
||||||
|
|
||||||
# Require lz4 & zstd only if we are compiling columnar
|
# Require lz4 & zstd only if we are compiling columnar
|
||||||
|
@ -4687,6 +4733,55 @@ fi
|
||||||
|
|
||||||
fi # test "$HAS_TABLEAM" == 'yes'
|
fi # test "$HAS_TABLEAM" == 'yes'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Check whether --with-security-flags was given.
|
||||||
|
if test "${with_security_flags+set}" = set; then :
|
||||||
|
withval=$with_security_flags;
|
||||||
|
case $withval in
|
||||||
|
yes)
|
||||||
|
:
|
||||||
|
;;
|
||||||
|
no)
|
||||||
|
:
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
as_fn_error $? "no argument expected for --with-security-flags option" "$LINENO" 5
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
else
|
||||||
|
with_security_flags=no
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if test "$with_security_flags" = yes; then
|
||||||
|
# Flags taken from: https://liquid.microsoft.com/Web/Object/Read/ms.security/Requirements/Microsoft.Security.SystemsADM.10203#guide
|
||||||
|
|
||||||
|
# We always want to have some compiler flags for security concerns.
|
||||||
|
SECURITY_CFLAGS="-fstack-protector-strong -D_FORTIFY_SOURCE=2 -O2 -z noexecstack -fpic -shared -Wl,-z,relro -Wl,-z,now -Wformat -Wformat-security -Werror=format-security"
|
||||||
|
CITUS_CFLAGS="$CITUS_CFLAGS $SECURITY_CFLAGS"
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: Blindly added security flags for linker: $SECURITY_CFLAGS" >&5
|
||||||
|
$as_echo "$as_me: Blindly added security flags for linker: $SECURITY_CFLAGS" >&6;}
|
||||||
|
|
||||||
|
# We always want to have some clang flags for security concerns.
|
||||||
|
# This doesn't include "-Wl,-z,relro -Wl,-z,now" on purpuse, because bitcode is not linked.
|
||||||
|
# This doesn't include -fsanitize=cfi because it breaks builds on many distros including
|
||||||
|
# Debian/Buster, Debian/Stretch, Ubuntu/Bionic, Ubuntu/Xenial and EL7.
|
||||||
|
SECURITY_BITCODE_CFLAGS="-fsanitize=safe-stack -fstack-protector-strong -flto -fPIC -Wformat -Wformat-security -Werror=format-security"
|
||||||
|
CITUS_BITCODE_CFLAGS="$CITUS_BITCODE_CFLAGS $SECURITY_BITCODE_CFLAGS"
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: Blindly added security flags for llvm: $SECURITY_BITCODE_CFLAGS" >&5
|
||||||
|
$as_echo "$as_me: Blindly added security flags for llvm: $SECURITY_BITCODE_CFLAGS" >&6;}
|
||||||
|
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: If you run into issues during linking or bitcode compilation, you can use --without-security-flags." >&5
|
||||||
|
$as_echo "$as_me: WARNING: If you run into issues during linking or bitcode compilation, you can use --without-security-flags." >&2;}
|
||||||
|
fi
|
||||||
|
|
||||||
# Check if git is installed, when installed the gitref of the checkout will be baked in the application
|
# Check if git is installed, when installed the gitref of the checkout will be baked in the application
|
||||||
# Extract the first word of "git", so it can be a program name with args.
|
# Extract the first word of "git", so it can be a program name with args.
|
||||||
set dummy git; ac_word=$2
|
set dummy git; ac_word=$2
|
||||||
|
@ -4752,6 +4847,8 @@ fi
|
||||||
|
|
||||||
CITUS_CFLAGS="$CITUS_CFLAGS"
|
CITUS_CFLAGS="$CITUS_CFLAGS"
|
||||||
|
|
||||||
|
CITUS_BITCODE_CFLAGS="$CITUS_BITCODE_CFLAGS"
|
||||||
|
|
||||||
CITUS_CPPFLAGS="$CITUS_CPPFLAGS"
|
CITUS_CPPFLAGS="$CITUS_CPPFLAGS"
|
||||||
|
|
||||||
CITUS_LDFLAGS="$LIBS $CITUS_LDFLAGS"
|
CITUS_LDFLAGS="$LIBS $CITUS_LDFLAGS"
|
||||||
|
@ -5276,7 +5373,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by Citus $as_me 10.0devel, which was
|
This file was extended by Citus $as_me 10.0.8, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
|
@ -5338,7 +5435,7 @@ _ACEOF
|
||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
Citus config.status 10.0devel
|
Citus config.status 10.0.8
|
||||||
configured by $0, generated by GNU Autoconf 2.69,
|
configured by $0, generated by GNU Autoconf 2.69,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
|
33
configure.in
33
configure.in
|
@ -5,7 +5,7 @@
|
||||||
# everyone needing autoconf installed, the resulting files are checked
|
# everyone needing autoconf installed, the resulting files are checked
|
||||||
# into the SCM.
|
# into the SCM.
|
||||||
|
|
||||||
AC_INIT([Citus], [10.0devel])
|
AC_INIT([Citus], [10.0.8])
|
||||||
AC_COPYRIGHT([Copyright (c) Citus Data, Inc.])
|
AC_COPYRIGHT([Copyright (c) Citus Data, Inc.])
|
||||||
|
|
||||||
# we'll need sed and awk for some of the version commands
|
# we'll need sed and awk for some of the version commands
|
||||||
|
@ -174,6 +174,10 @@ CITUSAC_PROG_CC_CFLAGS_OPT([-Werror=vla]) # visual studio does not support thes
|
||||||
CITUSAC_PROG_CC_CFLAGS_OPT([-Werror=implicit-int])
|
CITUSAC_PROG_CC_CFLAGS_OPT([-Werror=implicit-int])
|
||||||
CITUSAC_PROG_CC_CFLAGS_OPT([-Werror=implicit-function-declaration])
|
CITUSAC_PROG_CC_CFLAGS_OPT([-Werror=implicit-function-declaration])
|
||||||
CITUSAC_PROG_CC_CFLAGS_OPT([-Werror=return-type])
|
CITUSAC_PROG_CC_CFLAGS_OPT([-Werror=return-type])
|
||||||
|
# Security flags
|
||||||
|
# Flags taken from: https://liquid.microsoft.com/Web/Object/Read/ms.security/Requirements/Microsoft.Security.SystemsADM.10203#guide
|
||||||
|
# We do not enforce the following flag because it is only available on GCC>=8
|
||||||
|
CITUSAC_PROG_CC_CFLAGS_OPT([-fstack-clash-protection])
|
||||||
|
|
||||||
#
|
#
|
||||||
# --enable-coverage enables generation of code coverage metrics with gcov
|
# --enable-coverage enables generation of code coverage metrics with gcov
|
||||||
|
@ -216,7 +220,7 @@ if test "$version_num" != '11'; then
|
||||||
HAS_TABLEAM=yes
|
HAS_TABLEAM=yes
|
||||||
AC_DEFINE([HAS_TABLEAM], 1, [Define to 1 to build with table access method support, pg12 and up])
|
AC_DEFINE([HAS_TABLEAM], 1, [Define to 1 to build with table access method support, pg12 and up])
|
||||||
else
|
else
|
||||||
AC_MSG_NOTICE([postgres version does not support table access methodds])
|
AC_MSG_NOTICE([postgres version does not support table access methods])
|
||||||
fi;
|
fi;
|
||||||
|
|
||||||
# Require lz4 & zstd only if we are compiling columnar
|
# Require lz4 & zstd only if we are compiling columnar
|
||||||
|
@ -261,11 +265,36 @@ if test "$HAS_TABLEAM" == 'yes'; then
|
||||||
|
|
||||||
fi # test "$HAS_TABLEAM" == 'yes'
|
fi # test "$HAS_TABLEAM" == 'yes'
|
||||||
|
|
||||||
|
|
||||||
|
PGAC_ARG_BOOL(with, security-flags, no,
|
||||||
|
[use security flags])
|
||||||
|
AC_SUBST(with_security_flags)
|
||||||
|
|
||||||
|
if test "$with_security_flags" = yes; then
|
||||||
|
# Flags taken from: https://liquid.microsoft.com/Web/Object/Read/ms.security/Requirements/Microsoft.Security.SystemsADM.10203#guide
|
||||||
|
|
||||||
|
# We always want to have some compiler flags for security concerns.
|
||||||
|
SECURITY_CFLAGS="-fstack-protector-strong -D_FORTIFY_SOURCE=2 -O2 -z noexecstack -fpic -shared -Wl,-z,relro -Wl,-z,now -Wformat -Wformat-security -Werror=format-security"
|
||||||
|
CITUS_CFLAGS="$CITUS_CFLAGS $SECURITY_CFLAGS"
|
||||||
|
AC_MSG_NOTICE([Blindly added security flags for linker: $SECURITY_CFLAGS])
|
||||||
|
|
||||||
|
# We always want to have some clang flags for security concerns.
|
||||||
|
# This doesn't include "-Wl,-z,relro -Wl,-z,now" on purpuse, because bitcode is not linked.
|
||||||
|
# This doesn't include -fsanitize=cfi because it breaks builds on many distros including
|
||||||
|
# Debian/Buster, Debian/Stretch, Ubuntu/Bionic, Ubuntu/Xenial and EL7.
|
||||||
|
SECURITY_BITCODE_CFLAGS="-fsanitize=safe-stack -fstack-protector-strong -flto -fPIC -Wformat -Wformat-security -Werror=format-security"
|
||||||
|
CITUS_BITCODE_CFLAGS="$CITUS_BITCODE_CFLAGS $SECURITY_BITCODE_CFLAGS"
|
||||||
|
AC_MSG_NOTICE([Blindly added security flags for llvm: $SECURITY_BITCODE_CFLAGS])
|
||||||
|
|
||||||
|
AC_MSG_WARN([If you run into issues during linking or bitcode compilation, you can use --without-security-flags.])
|
||||||
|
fi
|
||||||
|
|
||||||
# Check if git is installed, when installed the gitref of the checkout will be baked in the application
|
# Check if git is installed, when installed the gitref of the checkout will be baked in the application
|
||||||
AC_PATH_PROG(GIT_BIN, git)
|
AC_PATH_PROG(GIT_BIN, git)
|
||||||
AC_CHECK_FILE(.git,[HAS_DOTGIT=yes], [HAS_DOTGIT=])
|
AC_CHECK_FILE(.git,[HAS_DOTGIT=yes], [HAS_DOTGIT=])
|
||||||
|
|
||||||
AC_SUBST(CITUS_CFLAGS, "$CITUS_CFLAGS")
|
AC_SUBST(CITUS_CFLAGS, "$CITUS_CFLAGS")
|
||||||
|
AC_SUBST(CITUS_BITCODE_CFLAGS, "$CITUS_BITCODE_CFLAGS")
|
||||||
AC_SUBST(CITUS_CPPFLAGS, "$CITUS_CPPFLAGS")
|
AC_SUBST(CITUS_CPPFLAGS, "$CITUS_CPPFLAGS")
|
||||||
AC_SUBST(CITUS_LDFLAGS, "$LIBS $CITUS_LDFLAGS")
|
AC_SUBST(CITUS_LDFLAGS, "$LIBS $CITUS_LDFLAGS")
|
||||||
AC_SUBST(POSTGRES_SRCDIR, "$POSTGRES_SRCDIR")
|
AC_SUBST(POSTGRES_SRCDIR, "$POSTGRES_SRCDIR")
|
||||||
|
|
|
@ -311,8 +311,13 @@ DeleteColumnarTableOptions(Oid regclass, bool missingOk)
|
||||||
*/
|
*/
|
||||||
Assert(!IsBinaryUpgrade);
|
Assert(!IsBinaryUpgrade);
|
||||||
|
|
||||||
Relation columnarOptions = relation_open(ColumnarOptionsRelationId(),
|
Relation columnarOptions = try_relation_open(ColumnarOptionsRelationId(),
|
||||||
RowExclusiveLock);
|
RowExclusiveLock);
|
||||||
|
if (columnarOptions == NULL)
|
||||||
|
{
|
||||||
|
/* extension has been dropped */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/* find existing item to remove */
|
/* find existing item to remove */
|
||||||
ScanKeyData scanKey[1] = { 0 };
|
ScanKeyData scanKey[1] = { 0 };
|
||||||
|
@ -1087,7 +1092,11 @@ DatumToBytea(Datum value, Form_pg_attribute attrForm)
|
||||||
{
|
{
|
||||||
if (attrForm->attbyval)
|
if (attrForm->attbyval)
|
||||||
{
|
{
|
||||||
store_att_byval(VARDATA(result), value, attrForm->attlen);
|
Datum tmp;
|
||||||
|
store_att_byval(&tmp, value, attrForm->attlen);
|
||||||
|
|
||||||
|
memcpy_s(VARDATA(result), datumLength + VARHDRSZ,
|
||||||
|
&tmp, attrForm->attlen);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
#else
|
#else
|
||||||
#include "optimizer/clauses.h"
|
#include "optimizer/clauses.h"
|
||||||
#include "optimizer/predtest.h"
|
#include "optimizer/predtest.h"
|
||||||
|
#include "optimizer/var.h"
|
||||||
#endif
|
#endif
|
||||||
#include "optimizer/restrictinfo.h"
|
#include "optimizer/restrictinfo.h"
|
||||||
#include "storage/fd.h"
|
#include "storage/fd.h"
|
||||||
|
@ -62,6 +63,8 @@ struct TableReadState
|
||||||
List *projectedColumnList;
|
List *projectedColumnList;
|
||||||
|
|
||||||
List *whereClauseList;
|
List *whereClauseList;
|
||||||
|
List *whereClauseVars;
|
||||||
|
|
||||||
MemoryContext stripeReadContext;
|
MemoryContext stripeReadContext;
|
||||||
StripeBuffers *stripeBuffers;
|
StripeBuffers *stripeBuffers;
|
||||||
uint32 readStripeCount;
|
uint32 readStripeCount;
|
||||||
|
@ -77,6 +80,7 @@ static StripeBuffers * LoadFilteredStripeBuffers(Relation relation,
|
||||||
TupleDesc tupleDescriptor,
|
TupleDesc tupleDescriptor,
|
||||||
List *projectedColumnList,
|
List *projectedColumnList,
|
||||||
List *whereClauseList,
|
List *whereClauseList,
|
||||||
|
List *whereClauseVars,
|
||||||
int64 *chunkGroupsFiltered);
|
int64 *chunkGroupsFiltered);
|
||||||
static void ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList,
|
static void ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList,
|
||||||
uint64 chunkIndex, uint64 chunkRowIndex,
|
uint64 chunkIndex, uint64 chunkRowIndex,
|
||||||
|
@ -87,10 +91,11 @@ static ColumnBuffers * LoadColumnBuffers(Relation relation,
|
||||||
uint32 chunkCount, uint64 stripeOffset,
|
uint32 chunkCount, uint64 stripeOffset,
|
||||||
Form_pg_attribute attributeForm);
|
Form_pg_attribute attributeForm);
|
||||||
static bool * SelectedChunkMask(StripeSkipList *stripeSkipList,
|
static bool * SelectedChunkMask(StripeSkipList *stripeSkipList,
|
||||||
List *projectedColumnList, List *whereClauseList,
|
List *whereClauseList, List *whereClauseVars,
|
||||||
int64 *chunkGroupsFiltered);
|
int64 *chunkGroupsFiltered);
|
||||||
static List * BuildRestrictInfoList(List *whereClauseList);
|
static List * BuildRestrictInfoList(List *whereClauseList);
|
||||||
static Node * BuildBaseConstraint(Var *variable);
|
static Node * BuildBaseConstraint(Var *variable);
|
||||||
|
static List * GetClauseVars(List *clauses, int natts);
|
||||||
static OpExpr * MakeOpExpression(Var *variable, int16 strategyNumber);
|
static OpExpr * MakeOpExpression(Var *variable, int16 strategyNumber);
|
||||||
static Oid GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber);
|
static Oid GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber);
|
||||||
static void UpdateConstraint(Node *baseConstraint, Datum minValue, Datum maxValue);
|
static void UpdateConstraint(Node *baseConstraint, Datum minValue, Datum maxValue);
|
||||||
|
@ -142,6 +147,7 @@ ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor,
|
||||||
readState->stripeList = stripeList;
|
readState->stripeList = stripeList;
|
||||||
readState->projectedColumnList = projectedColumnList;
|
readState->projectedColumnList = projectedColumnList;
|
||||||
readState->whereClauseList = whereClauseList;
|
readState->whereClauseList = whereClauseList;
|
||||||
|
readState->whereClauseVars = GetClauseVars(whereClauseList, tupleDescriptor->natts);
|
||||||
readState->stripeBuffers = NULL;
|
readState->stripeBuffers = NULL;
|
||||||
readState->readStripeCount = 0;
|
readState->readStripeCount = 0;
|
||||||
readState->stripeReadRowCount = 0;
|
readState->stripeReadRowCount = 0;
|
||||||
|
@ -218,6 +224,8 @@ ColumnarReadNextRow(TableReadState *readState, Datum *columnValues, bool *column
|
||||||
projectedColumnList,
|
projectedColumnList,
|
||||||
readState->
|
readState->
|
||||||
whereClauseList,
|
whereClauseList,
|
||||||
|
readState->
|
||||||
|
whereClauseVars,
|
||||||
&readState->
|
&readState->
|
||||||
chunkGroupsFiltered);
|
chunkGroupsFiltered);
|
||||||
readState->readStripeCount++;
|
readState->readStripeCount++;
|
||||||
|
@ -400,7 +408,8 @@ ColumnarTableRowCount(Relation relation)
|
||||||
static StripeBuffers *
|
static StripeBuffers *
|
||||||
LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata,
|
LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata,
|
||||||
TupleDesc tupleDescriptor, List *projectedColumnList,
|
TupleDesc tupleDescriptor, List *projectedColumnList,
|
||||||
List *whereClauseList, int64 *chunkGroupsFiltered)
|
List *whereClauseList, List *whereClauseVars,
|
||||||
|
int64 *chunkGroupsFiltered)
|
||||||
{
|
{
|
||||||
uint32 columnIndex = 0;
|
uint32 columnIndex = 0;
|
||||||
uint32 columnCount = tupleDescriptor->natts;
|
uint32 columnCount = tupleDescriptor->natts;
|
||||||
|
@ -412,8 +421,8 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata,
|
||||||
tupleDescriptor,
|
tupleDescriptor,
|
||||||
stripeMetadata->chunkCount);
|
stripeMetadata->chunkCount);
|
||||||
|
|
||||||
bool *selectedChunkMask = SelectedChunkMask(stripeSkipList, projectedColumnList,
|
bool *selectedChunkMask = SelectedChunkMask(stripeSkipList, whereClauseList,
|
||||||
whereClauseList, chunkGroupsFiltered);
|
whereClauseVars, chunkGroupsFiltered);
|
||||||
|
|
||||||
StripeSkipList *selectedChunkSkipList =
|
StripeSkipList *selectedChunkSkipList =
|
||||||
SelectedChunkSkipList(stripeSkipList, projectedColumnMask,
|
SelectedChunkSkipList(stripeSkipList, projectedColumnMask,
|
||||||
|
@ -551,8 +560,8 @@ LoadColumnBuffers(Relation relation, ColumnChunkSkipNode *chunkSkipNodeArray,
|
||||||
* the chunk can be refuted by the given qualifier conditions.
|
* the chunk can be refuted by the given qualifier conditions.
|
||||||
*/
|
*/
|
||||||
static bool *
|
static bool *
|
||||||
SelectedChunkMask(StripeSkipList *stripeSkipList, List *projectedColumnList,
|
SelectedChunkMask(StripeSkipList *stripeSkipList, List *whereClauseList,
|
||||||
List *whereClauseList, int64 *chunkGroupsFiltered)
|
List *whereClauseVars, int64 *chunkGroupsFiltered)
|
||||||
{
|
{
|
||||||
ListCell *columnCell = NULL;
|
ListCell *columnCell = NULL;
|
||||||
uint32 chunkIndex = 0;
|
uint32 chunkIndex = 0;
|
||||||
|
@ -561,7 +570,7 @@ SelectedChunkMask(StripeSkipList *stripeSkipList, List *projectedColumnList,
|
||||||
bool *selectedChunkMask = palloc0(stripeSkipList->chunkCount * sizeof(bool));
|
bool *selectedChunkMask = palloc0(stripeSkipList->chunkCount * sizeof(bool));
|
||||||
memset(selectedChunkMask, true, stripeSkipList->chunkCount * sizeof(bool));
|
memset(selectedChunkMask, true, stripeSkipList->chunkCount * sizeof(bool));
|
||||||
|
|
||||||
foreach(columnCell, projectedColumnList)
|
foreach(columnCell, whereClauseVars)
|
||||||
{
|
{
|
||||||
Var *column = lfirst(columnCell);
|
Var *column = lfirst(columnCell);
|
||||||
uint32 columnIndex = column->varattno - 1;
|
uint32 columnIndex = column->varattno - 1;
|
||||||
|
@ -693,6 +702,58 @@ BuildBaseConstraint(Var *variable)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetClauseVars extracts the Vars from the given clauses for the purpose of
|
||||||
|
* building constraints that can be refuted by predicate_refuted_by(). It also
|
||||||
|
* deduplicates and sorts them.
|
||||||
|
*/
|
||||||
|
static List *
|
||||||
|
GetClauseVars(List *whereClauseList, int natts)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We don't recurse into or include aggregates, window functions, or
|
||||||
|
* PHVs. We don't expect any PHVs during execution; and Vars found inside
|
||||||
|
* an aggregate or window function aren't going to be useful in forming
|
||||||
|
* constraints that can be refuted.
|
||||||
|
*/
|
||||||
|
int flags = 0;
|
||||||
|
List *vars = pull_var_clause((Node *) whereClauseList, flags);
|
||||||
|
Var **deduplicate = palloc0(sizeof(Var *) * natts);
|
||||||
|
|
||||||
|
ListCell *lc;
|
||||||
|
foreach(lc, vars)
|
||||||
|
{
|
||||||
|
Node *node = lfirst(lc);
|
||||||
|
Assert(IsA(node, Var));
|
||||||
|
|
||||||
|
Var *var = (Var *) node;
|
||||||
|
int idx = var->varattno - 1;
|
||||||
|
|
||||||
|
if (deduplicate[idx] != NULL)
|
||||||
|
{
|
||||||
|
/* if they have the same varattno, the rest should be identical */
|
||||||
|
Assert(equal(var, deduplicate[idx]));
|
||||||
|
}
|
||||||
|
|
||||||
|
deduplicate[idx] = var;
|
||||||
|
}
|
||||||
|
|
||||||
|
List *whereClauseVars = NIL;
|
||||||
|
for (int i = 0; i < natts; i++)
|
||||||
|
{
|
||||||
|
Var *var = deduplicate[i];
|
||||||
|
if (var != NULL)
|
||||||
|
{
|
||||||
|
whereClauseVars = lappend(whereClauseVars, var);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pfree(deduplicate);
|
||||||
|
|
||||||
|
return whereClauseVars;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* MakeOpExpression builds an operator expression node. This operator expression
|
* MakeOpExpression builds an operator expression node. This operator expression
|
||||||
* implements the operator clause as defined by the variable and the strategy
|
* implements the operator clause as defined by the variable and the strategy
|
||||||
|
|
|
@ -160,6 +160,8 @@ columnar_beginscan(Relation relation, Snapshot snapshot,
|
||||||
ParallelTableScanDesc parallel_scan,
|
ParallelTableScanDesc parallel_scan,
|
||||||
uint32 flags)
|
uint32 flags)
|
||||||
{
|
{
|
||||||
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
int natts = relation->rd_att->natts;
|
int natts = relation->rd_att->natts;
|
||||||
Bitmapset *attr_needed = NULL;
|
Bitmapset *attr_needed = NULL;
|
||||||
|
|
||||||
|
@ -419,6 +421,8 @@ static bool
|
||||||
columnar_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
|
columnar_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
|
||||||
Snapshot snapshot)
|
Snapshot snapshot)
|
||||||
{
|
{
|
||||||
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -436,6 +440,8 @@ static void
|
||||||
columnar_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
|
columnar_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
|
||||||
int options, BulkInsertState bistate)
|
int options, BulkInsertState bistate)
|
||||||
{
|
{
|
||||||
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* columnar_init_write_state allocates the write state in a longer
|
* columnar_init_write_state allocates the write state in a longer
|
||||||
* lasting context, so no need to worry about it.
|
* lasting context, so no need to worry about it.
|
||||||
|
@ -481,6 +487,8 @@ static void
|
||||||
columnar_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
columnar_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
||||||
CommandId cid, int options, BulkInsertState bistate)
|
CommandId cid, int options, BulkInsertState bistate)
|
||||||
{
|
{
|
||||||
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
TableWriteState *writeState = columnar_init_write_state(relation,
|
TableWriteState *writeState = columnar_init_write_state(relation,
|
||||||
RelationGetDescr(relation),
|
RelationGetDescr(relation),
|
||||||
GetCurrentSubTransactionId());
|
GetCurrentSubTransactionId());
|
||||||
|
@ -552,6 +560,8 @@ columnar_relation_set_new_filenode(Relation rel,
|
||||||
TransactionId *freezeXid,
|
TransactionId *freezeXid,
|
||||||
MultiXactId *minmulti)
|
MultiXactId *minmulti)
|
||||||
{
|
{
|
||||||
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
if (persistence != RELPERSISTENCE_PERMANENT)
|
if (persistence != RELPERSISTENCE_PERMANENT)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
@ -581,6 +591,8 @@ columnar_relation_set_new_filenode(Relation rel,
|
||||||
static void
|
static void
|
||||||
columnar_relation_nontransactional_truncate(Relation rel)
|
columnar_relation_nontransactional_truncate(Relation rel)
|
||||||
{
|
{
|
||||||
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
RelFileNode relfilenode = rel->rd_node;
|
RelFileNode relfilenode = rel->rd_node;
|
||||||
|
|
||||||
NonTransactionDropWriteState(relfilenode.relNode);
|
NonTransactionDropWriteState(relfilenode.relNode);
|
||||||
|
@ -625,6 +637,8 @@ columnar_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
|
||||||
double *tups_vacuumed,
|
double *tups_vacuumed,
|
||||||
double *tups_recently_dead)
|
double *tups_recently_dead)
|
||||||
{
|
{
|
||||||
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
TupleDesc sourceDesc = RelationGetDescr(OldHeap);
|
TupleDesc sourceDesc = RelationGetDescr(OldHeap);
|
||||||
TupleDesc targetDesc = RelationGetDescr(NewHeap);
|
TupleDesc targetDesc = RelationGetDescr(NewHeap);
|
||||||
|
|
||||||
|
@ -670,6 +684,27 @@ columnar_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ColumnarTableTupleCount returns the number of tuples that columnar
|
||||||
|
* table with relationId has by using stripe metadata.
|
||||||
|
*/
|
||||||
|
static uint64
|
||||||
|
ColumnarTableTupleCount(Relation relation)
|
||||||
|
{
|
||||||
|
List *stripeList = StripesForRelfilenode(relation->rd_node);
|
||||||
|
uint64 tupleCount = 0;
|
||||||
|
|
||||||
|
ListCell *lc = NULL;
|
||||||
|
foreach(lc, stripeList)
|
||||||
|
{
|
||||||
|
StripeMetadata *stripe = lfirst(lc);
|
||||||
|
tupleCount += stripe->rowCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
return tupleCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* columnar_vacuum_rel implements VACUUM without FULL option.
|
* columnar_vacuum_rel implements VACUUM without FULL option.
|
||||||
*/
|
*/
|
||||||
|
@ -677,6 +712,18 @@ static void
|
||||||
columnar_vacuum_rel(Relation rel, VacuumParams *params,
|
columnar_vacuum_rel(Relation rel, VacuumParams *params,
|
||||||
BufferAccessStrategy bstrategy)
|
BufferAccessStrategy bstrategy)
|
||||||
{
|
{
|
||||||
|
if (!CheckCitusVersion(WARNING))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Skip if the extension catalogs are not up-to-date, but avoid
|
||||||
|
* erroring during auto-vacuum.
|
||||||
|
*/
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
|
||||||
|
RelationGetRelid(rel));
|
||||||
|
|
||||||
int elevel = (params->options & VACOPT_VERBOSE) ? INFO : DEBUG2;
|
int elevel = (params->options & VACOPT_VERBOSE) ? INFO : DEBUG2;
|
||||||
|
|
||||||
/* this should have been resolved by vacuum.c until now */
|
/* this should have been resolved by vacuum.c until now */
|
||||||
|
@ -692,6 +739,52 @@ columnar_vacuum_rel(Relation rel, VacuumParams *params,
|
||||||
{
|
{
|
||||||
TruncateColumnar(rel, elevel);
|
TruncateColumnar(rel, elevel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RelationOpenSmgr(rel);
|
||||||
|
BlockNumber new_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
|
||||||
|
|
||||||
|
/* get the number of indexes */
|
||||||
|
List *indexList = RelationGetIndexList(rel);
|
||||||
|
int nindexes = list_length(indexList);
|
||||||
|
|
||||||
|
TransactionId oldestXmin;
|
||||||
|
TransactionId freezeLimit;
|
||||||
|
MultiXactId multiXactCutoff;
|
||||||
|
|
||||||
|
/* initialize xids */
|
||||||
|
TransactionId xidFullScanLimit;
|
||||||
|
MultiXactId mxactFullScanLimit;
|
||||||
|
vacuum_set_xid_limits(rel,
|
||||||
|
params->freeze_min_age,
|
||||||
|
params->freeze_table_age,
|
||||||
|
params->multixact_freeze_min_age,
|
||||||
|
params->multixact_freeze_table_age,
|
||||||
|
&oldestXmin, &freezeLimit, &xidFullScanLimit,
|
||||||
|
&multiXactCutoff, &mxactFullScanLimit);
|
||||||
|
|
||||||
|
Assert(TransactionIdPrecedesOrEquals(freezeLimit, oldestXmin));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Columnar storage doesn't hold any transaction IDs, so we can always
|
||||||
|
* just advance to the most aggressive value.
|
||||||
|
*/
|
||||||
|
TransactionId newRelFrozenXid = oldestXmin;
|
||||||
|
MultiXactId newRelminMxid = multiXactCutoff;
|
||||||
|
|
||||||
|
double new_live_tuples = ColumnarTableTupleCount(rel);
|
||||||
|
|
||||||
|
/* all visible pages are always 0 */
|
||||||
|
BlockNumber new_rel_allvisible = 0;
|
||||||
|
|
||||||
|
vac_update_relstats(rel, new_rel_pages, new_live_tuples,
|
||||||
|
new_rel_allvisible, nindexes > 0,
|
||||||
|
newRelFrozenXid, newRelminMxid, false);
|
||||||
|
|
||||||
|
pgstat_report_vacuum(RelationGetRelid(rel),
|
||||||
|
rel->rd_rel->relisshared,
|
||||||
|
Max(new_live_tuples, 0),
|
||||||
|
0);
|
||||||
|
pgstat_progress_end_command();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1006,6 +1099,8 @@ columnar_index_validate_scan(Relation heapRelation,
|
||||||
static uint64
|
static uint64
|
||||||
columnar_relation_size(Relation rel, ForkNumber forkNumber)
|
columnar_relation_size(Relation rel, ForkNumber forkNumber)
|
||||||
{
|
{
|
||||||
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
uint64 nblocks = 0;
|
uint64 nblocks = 0;
|
||||||
|
|
||||||
/* Open it at the smgr level if not already done */
|
/* Open it at the smgr level if not already done */
|
||||||
|
@ -1031,6 +1126,8 @@ columnar_relation_size(Relation rel, ForkNumber forkNumber)
|
||||||
static bool
|
static bool
|
||||||
columnar_relation_needs_toast_table(Relation rel)
|
columnar_relation_needs_toast_table(Relation rel)
|
||||||
{
|
{
|
||||||
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1040,6 +1137,8 @@ columnar_estimate_rel_size(Relation rel, int32 *attr_widths,
|
||||||
BlockNumber *pages, double *tuples,
|
BlockNumber *pages, double *tuples,
|
||||||
double *allvisfrac)
|
double *allvisfrac)
|
||||||
{
|
{
|
||||||
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
RelationOpenSmgr(rel);
|
RelationOpenSmgr(rel);
|
||||||
*pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
|
*pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
|
||||||
*tuples = ColumnarTableRowCount(rel);
|
*tuples = ColumnarTableRowCount(rel);
|
||||||
|
@ -1218,6 +1317,8 @@ ColumnarTableDropHook(Oid relid)
|
||||||
|
|
||||||
if (IsColumnarTableAmTable(relid))
|
if (IsColumnarTableAmTable(relid))
|
||||||
{
|
{
|
||||||
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Drop metadata. No need to drop storage here since for
|
* Drop metadata. No need to drop storage here since for
|
||||||
* tableam tables storage is managed by postgres.
|
* tableam tables storage is managed by postgres.
|
||||||
|
@ -1653,6 +1754,8 @@ PG_FUNCTION_INFO_V1(alter_columnar_table_set);
|
||||||
Datum
|
Datum
|
||||||
alter_columnar_table_set(PG_FUNCTION_ARGS)
|
alter_columnar_table_set(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
Oid relationId = PG_GETARG_OID(0);
|
Oid relationId = PG_GETARG_OID(0);
|
||||||
|
|
||||||
Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */
|
Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */
|
||||||
|
@ -1662,6 +1765,8 @@ alter_columnar_table_set(PG_FUNCTION_ARGS)
|
||||||
quote_identifier(RelationGetRelationName(rel)))));
|
quote_identifier(RelationGetRelationName(rel)))));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EnsureTableOwner(relationId);
|
||||||
|
|
||||||
ColumnarOptions options = { 0 };
|
ColumnarOptions options = { 0 };
|
||||||
if (!ReadColumnarOptions(relationId, &options))
|
if (!ReadColumnarOptions(relationId, &options))
|
||||||
{
|
{
|
||||||
|
@ -1760,6 +1865,8 @@ PG_FUNCTION_INFO_V1(alter_columnar_table_reset);
|
||||||
Datum
|
Datum
|
||||||
alter_columnar_table_reset(PG_FUNCTION_ARGS)
|
alter_columnar_table_reset(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
Oid relationId = PG_GETARG_OID(0);
|
Oid relationId = PG_GETARG_OID(0);
|
||||||
|
|
||||||
Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */
|
Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */
|
||||||
|
@ -1769,6 +1876,8 @@ alter_columnar_table_reset(PG_FUNCTION_ARGS)
|
||||||
quote_identifier(RelationGetRelationName(rel)))));
|
quote_identifier(RelationGetRelationName(rel)))));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EnsureTableOwner(relationId);
|
||||||
|
|
||||||
ColumnarOptions options = { 0 };
|
ColumnarOptions options = { 0 };
|
||||||
if (!ReadColumnarOptions(relationId, &options))
|
if (!ReadColumnarOptions(relationId, &options))
|
||||||
{
|
{
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
/* columnar--10.0-1--10.0-2.sql */
|
||||||
|
|
||||||
|
-- grant read access for columnar metadata tables to unprivileged user
|
||||||
|
GRANT USAGE ON SCHEMA columnar TO PUBLIC;
|
||||||
|
GRANT SELECT ON ALL tables IN SCHEMA columnar TO PUBLIC ;
|
|
@ -0,0 +1,5 @@
|
||||||
|
/* columnar--10.0-2--10.0-1.sql */
|
||||||
|
|
||||||
|
-- revoke read access for columnar metadata tables from unprivileged user
|
||||||
|
REVOKE USAGE ON SCHEMA columnar FROM PUBLIC;
|
||||||
|
REVOKE SELECT ON ALL tables IN SCHEMA columnar FROM PUBLIC;
|
|
@ -1,6 +1,6 @@
|
||||||
# Citus extension
|
# Citus extension
|
||||||
comment = 'Citus distributed database'
|
comment = 'Citus distributed database'
|
||||||
default_version = '10.0-1'
|
default_version = '10.0-4'
|
||||||
module_pathname = '$libdir/citus'
|
module_pathname = '$libdir/citus'
|
||||||
relocatable = false
|
relocatable = false
|
||||||
schema = pg_catalog
|
schema = pg_catalog
|
||||||
|
|
|
@ -29,9 +29,12 @@
|
||||||
#include "fmgr.h"
|
#include "fmgr.h"
|
||||||
|
|
||||||
#include "access/hash.h"
|
#include "access/hash.h"
|
||||||
|
#include "access/htup_details.h"
|
||||||
#include "access/xact.h"
|
#include "access/xact.h"
|
||||||
#include "catalog/dependency.h"
|
#include "catalog/dependency.h"
|
||||||
#include "catalog/pg_am.h"
|
#include "catalog/pg_am.h"
|
||||||
|
#include "catalog/pg_depend.h"
|
||||||
|
#include "catalog/pg_rewrite_d.h"
|
||||||
#include "columnar/columnar.h"
|
#include "columnar/columnar.h"
|
||||||
#include "columnar/columnar_tableam.h"
|
#include "columnar/columnar_tableam.h"
|
||||||
#include "distributed/colocation_utils.h"
|
#include "distributed/colocation_utils.h"
|
||||||
|
@ -43,12 +46,15 @@
|
||||||
#include "distributed/listutils.h"
|
#include "distributed/listutils.h"
|
||||||
#include "distributed/local_executor.h"
|
#include "distributed/local_executor.h"
|
||||||
#include "distributed/metadata/dependency.h"
|
#include "distributed/metadata/dependency.h"
|
||||||
|
#include "distributed/metadata/distobject.h"
|
||||||
#include "distributed/metadata_cache.h"
|
#include "distributed/metadata_cache.h"
|
||||||
#include "distributed/metadata_sync.h"
|
#include "distributed/metadata_sync.h"
|
||||||
#include "distributed/multi_executor.h"
|
#include "distributed/multi_executor.h"
|
||||||
#include "distributed/multi_logical_planner.h"
|
#include "distributed/multi_logical_planner.h"
|
||||||
#include "distributed/multi_partitioning_utils.h"
|
#include "distributed/multi_partitioning_utils.h"
|
||||||
#include "distributed/reference_table_utils.h"
|
#include "distributed/reference_table_utils.h"
|
||||||
|
#include "distributed/relation_access_tracking.h"
|
||||||
|
#include "distributed/shard_utils.h"
|
||||||
#include "distributed/worker_protocol.h"
|
#include "distributed/worker_protocol.h"
|
||||||
#include "distributed/worker_transaction.h"
|
#include "distributed/worker_transaction.h"
|
||||||
#include "executor/spi.h"
|
#include "executor/spi.h"
|
||||||
|
@ -175,6 +181,8 @@ static TableConversionReturn * AlterDistributedTable(TableConversionParameters *
|
||||||
static TableConversionReturn * AlterTableSetAccessMethod(
|
static TableConversionReturn * AlterTableSetAccessMethod(
|
||||||
TableConversionParameters *params);
|
TableConversionParameters *params);
|
||||||
static TableConversionReturn * ConvertTable(TableConversionState *con);
|
static TableConversionReturn * ConvertTable(TableConversionState *con);
|
||||||
|
static bool SwitchToSequentialAndLocalExecutionIfShardNameTooLong(char *relationName,
|
||||||
|
char *longestShardName);
|
||||||
static void EnsureTableNotReferencing(Oid relationId, char conversionType);
|
static void EnsureTableNotReferencing(Oid relationId, char conversionType);
|
||||||
static void EnsureTableNotReferenced(Oid relationId, char conversionType);
|
static void EnsureTableNotReferenced(Oid relationId, char conversionType);
|
||||||
static void EnsureTableNotForeign(Oid relationId);
|
static void EnsureTableNotForeign(Oid relationId);
|
||||||
|
@ -198,6 +206,8 @@ static bool WillRecreateForeignKeyToReferenceTable(Oid relationId,
|
||||||
CascadeToColocatedOption cascadeOption);
|
CascadeToColocatedOption cascadeOption);
|
||||||
static void WarningsForDroppingForeignKeysWithDistributedTables(Oid relationId);
|
static void WarningsForDroppingForeignKeysWithDistributedTables(Oid relationId);
|
||||||
static void ExecuteQueryViaSPI(char *query, int SPIOK);
|
static void ExecuteQueryViaSPI(char *query, int SPIOK);
|
||||||
|
static void ErrorIfUnsupportedCascadeObjects(Oid relationId);
|
||||||
|
static bool DoesCascadeDropUnsupportedObject(Oid classId, Oid id, HTAB *nodeMap);
|
||||||
|
|
||||||
PG_FUNCTION_INFO_V1(undistribute_table);
|
PG_FUNCTION_INFO_V1(undistribute_table);
|
||||||
PG_FUNCTION_INFO_V1(alter_distributed_table);
|
PG_FUNCTION_INFO_V1(alter_distributed_table);
|
||||||
|
@ -375,6 +385,8 @@ UndistributeTable(TableConversionParameters *params)
|
||||||
ErrorIfAnyPartitionRelationInvolvedInNonInheritedFKey(partitionList);
|
ErrorIfAnyPartitionRelationInvolvedInNonInheritedFKey(partitionList);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ErrorIfUnsupportedCascadeObjects(params->relationId);
|
||||||
|
|
||||||
params->conversionType = UNDISTRIBUTE_TABLE;
|
params->conversionType = UNDISTRIBUTE_TABLE;
|
||||||
params->shardCountIsNull = true;
|
params->shardCountIsNull = true;
|
||||||
TableConversionState *con = CreateTableConversion(params);
|
TableConversionState *con = CreateTableConversion(params);
|
||||||
|
@ -406,6 +418,8 @@ AlterDistributedTable(TableConversionParameters *params)
|
||||||
EnsureTableNotPartition(params->relationId);
|
EnsureTableNotPartition(params->relationId);
|
||||||
EnsureHashDistributedTable(params->relationId);
|
EnsureHashDistributedTable(params->relationId);
|
||||||
|
|
||||||
|
ErrorIfUnsupportedCascadeObjects(params->relationId);
|
||||||
|
|
||||||
params->conversionType = ALTER_DISTRIBUTED_TABLE;
|
params->conversionType = ALTER_DISTRIBUTED_TABLE;
|
||||||
TableConversionState *con = CreateTableConversion(params);
|
TableConversionState *con = CreateTableConversion(params);
|
||||||
CheckAlterDistributedTableConversionParameters(con);
|
CheckAlterDistributedTableConversionParameters(con);
|
||||||
|
@ -467,6 +481,8 @@ AlterTableSetAccessMethod(TableConversionParameters *params)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ErrorIfUnsupportedCascadeObjects(params->relationId);
|
||||||
|
|
||||||
params->conversionType = ALTER_TABLE_SET_ACCESS_METHOD;
|
params->conversionType = ALTER_TABLE_SET_ACCESS_METHOD;
|
||||||
params->shardCountIsNull = true;
|
params->shardCountIsNull = true;
|
||||||
TableConversionState *con = CreateTableConversion(params);
|
TableConversionState *con = CreateTableConversion(params);
|
||||||
|
@ -511,6 +527,10 @@ ConvertTable(TableConversionState *con)
|
||||||
bool oldEnableLocalReferenceForeignKeys = EnableLocalReferenceForeignKeys;
|
bool oldEnableLocalReferenceForeignKeys = EnableLocalReferenceForeignKeys;
|
||||||
SetLocalEnableLocalReferenceForeignKeys(false);
|
SetLocalEnableLocalReferenceForeignKeys(false);
|
||||||
|
|
||||||
|
/* switch to sequential execution if shard names will be too long */
|
||||||
|
SwitchToSequentialAndLocalExecutionIfRelationNameTooLong(con->relationId,
|
||||||
|
con->relationName);
|
||||||
|
|
||||||
if (con->conversionType == UNDISTRIBUTE_TABLE && con->cascadeViaForeignKeys &&
|
if (con->conversionType == UNDISTRIBUTE_TABLE && con->cascadeViaForeignKeys &&
|
||||||
(TableReferencing(con->relationId) || TableReferenced(con->relationId)))
|
(TableReferencing(con->relationId) || TableReferenced(con->relationId)))
|
||||||
{
|
{
|
||||||
|
@ -673,7 +693,7 @@ ConvertTable(TableConversionState *con)
|
||||||
Node *parseTree = ParseTreeNode(tableCreationSql);
|
Node *parseTree = ParseTreeNode(tableCreationSql);
|
||||||
|
|
||||||
RelayEventExtendNames(parseTree, con->schemaName, con->hashOfName);
|
RelayEventExtendNames(parseTree, con->schemaName, con->hashOfName);
|
||||||
ProcessUtilityParseTree(parseTree, tableCreationSql, PROCESS_UTILITY_TOPLEVEL,
|
ProcessUtilityParseTree(parseTree, tableCreationSql, PROCESS_UTILITY_QUERY,
|
||||||
NULL, None_Receiver, NULL);
|
NULL, None_Receiver, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -711,6 +731,32 @@ ConvertTable(TableConversionState *con)
|
||||||
CreateCitusTableLike(con);
|
CreateCitusTableLike(con);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* preserve colocation with procedures/functions */
|
||||||
|
if (con->conversionType == ALTER_DISTRIBUTED_TABLE)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Updating the colocationId of functions is always desirable for
|
||||||
|
* the following scenario:
|
||||||
|
* we have shardCount or colocateWith change
|
||||||
|
* AND entire co-location group is altered
|
||||||
|
* The reason for the second condition is because we currently don't
|
||||||
|
* remember the original table specified in the colocateWith when
|
||||||
|
* distributing the function. We only remember the colocationId in
|
||||||
|
* pg_dist_object table.
|
||||||
|
*/
|
||||||
|
if ((!con->shardCountIsNull || con->colocateWith != NULL) &&
|
||||||
|
(con->cascadeToColocated == CASCADE_TO_COLOCATED_YES || list_length(
|
||||||
|
con->colocatedTableList) == 1) && con->distributionColumn == NULL)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Update the colocationId from the one of the old relation to the one
|
||||||
|
* of the new relation for all tuples in citus.pg_dist_object
|
||||||
|
*/
|
||||||
|
UpdateDistributedObjectColocationId(TableColocationId(con->relationId),
|
||||||
|
TableColocationId(con->newRelationId));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ReplaceTable(con->relationId, con->newRelationId, justBeforeDropCommands,
|
ReplaceTable(con->relationId, con->newRelationId, justBeforeDropCommands,
|
||||||
con->suppressNoticeMessages);
|
con->suppressNoticeMessages);
|
||||||
|
|
||||||
|
@ -728,7 +774,7 @@ ConvertTable(TableConversionState *con)
|
||||||
Node *parseTree = ParseTreeNode(attachPartitionCommand);
|
Node *parseTree = ParseTreeNode(attachPartitionCommand);
|
||||||
|
|
||||||
ProcessUtilityParseTree(parseTree, attachPartitionCommand,
|
ProcessUtilityParseTree(parseTree, attachPartitionCommand,
|
||||||
PROCESS_UTILITY_TOPLEVEL,
|
PROCESS_UTILITY_QUERY,
|
||||||
NULL, None_Receiver, NULL);
|
NULL, None_Receiver, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1042,6 +1088,30 @@ CreateDistributedTableLike(TableConversionState *con)
|
||||||
{
|
{
|
||||||
newShardCount = con->shardCount;
|
newShardCount = con->shardCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Oid originalRelationId = con->relationId;
|
||||||
|
if (con->originalDistributionKey != NULL && PartitionTable(originalRelationId))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Due to dropped columns, the partition tables might have different
|
||||||
|
* distribution keys than their parents, see issue #5123 for details.
|
||||||
|
*
|
||||||
|
* At this point, we get the partitioning information from the
|
||||||
|
* originalRelationId, but we get the distribution key for newRelationId.
|
||||||
|
*
|
||||||
|
* We have to do this, because the newRelationId is just a placeholder
|
||||||
|
* at this moment, but that's going to be the table in pg_dist_partition.
|
||||||
|
*/
|
||||||
|
Oid parentRelationId = PartitionParentOid(originalRelationId);
|
||||||
|
Var *parentDistKey = DistPartitionKey(parentRelationId);
|
||||||
|
char *parentDistKeyColumnName =
|
||||||
|
ColumnToColumnName(parentRelationId, nodeToString(parentDistKey));
|
||||||
|
|
||||||
|
newDistributionKey =
|
||||||
|
FindColumnWithNameOnTargetRelation(parentRelationId, parentDistKeyColumnName,
|
||||||
|
con->newRelationId);
|
||||||
|
}
|
||||||
|
|
||||||
char partitionMethod = PartitionMethod(con->relationId);
|
char partitionMethod = PartitionMethod(con->relationId);
|
||||||
CreateDistributedTable(con->newRelationId, newDistributionKey, partitionMethod,
|
CreateDistributedTable(con->newRelationId, newDistributionKey, partitionMethod,
|
||||||
newShardCount, newColocateWith, false);
|
newShardCount, newColocateWith, false);
|
||||||
|
@ -1077,6 +1147,94 @@ CreateCitusTableLike(TableConversionState *con)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ErrorIfUnsupportedCascadeObjects gets oid of a relation, finds the objects
|
||||||
|
* that dropping this relation cascades into and errors if there are any extensions
|
||||||
|
* that would be dropped.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
ErrorIfUnsupportedCascadeObjects(Oid relationId)
|
||||||
|
{
|
||||||
|
HASHCTL info;
|
||||||
|
memset(&info, 0, sizeof(info));
|
||||||
|
info.keysize = sizeof(Oid);
|
||||||
|
info.entrysize = sizeof(Oid);
|
||||||
|
info.hash = oid_hash;
|
||||||
|
uint32 hashFlags = (HASH_ELEM | HASH_FUNCTION);
|
||||||
|
HTAB *nodeMap = hash_create("object dependency map (oid)", 64, &info, hashFlags);
|
||||||
|
|
||||||
|
bool unsupportedObjectInDepGraph =
|
||||||
|
DoesCascadeDropUnsupportedObject(RelationRelationId, relationId, nodeMap);
|
||||||
|
|
||||||
|
if (unsupportedObjectInDepGraph)
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("cannot alter table because an extension depends on it")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DoesCascadeDropUnsupportedObject walks through the objects that depend on the
|
||||||
|
* object with object id and returns true if it finds any unsupported objects.
|
||||||
|
*
|
||||||
|
* This function only checks extensions as unsupported objects.
|
||||||
|
*
|
||||||
|
* Extension dependency is different than the rest. If an object depends on an extension
|
||||||
|
* dropping the object would drop the extension too.
|
||||||
|
* So we check with IsObjectAddressOwnedByExtension function.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
DoesCascadeDropUnsupportedObject(Oid classId, Oid objectId, HTAB *nodeMap)
|
||||||
|
{
|
||||||
|
bool found = false;
|
||||||
|
hash_search(nodeMap, &objectId, HASH_ENTER, &found);
|
||||||
|
|
||||||
|
if (found)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ObjectAddress objectAddress = { 0 };
|
||||||
|
ObjectAddressSet(objectAddress, classId, objectId);
|
||||||
|
|
||||||
|
if (IsObjectAddressOwnedByExtension(&objectAddress, NULL))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Oid targetObjectClassId = classId;
|
||||||
|
Oid targetObjectId = objectId;
|
||||||
|
List *dependencyTupleList = GetPgDependTuplesForDependingObjects(targetObjectClassId,
|
||||||
|
targetObjectId);
|
||||||
|
|
||||||
|
HeapTuple depTup = NULL;
|
||||||
|
foreach_ptr(depTup, dependencyTupleList)
|
||||||
|
{
|
||||||
|
Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup);
|
||||||
|
|
||||||
|
Oid dependingOid = InvalidOid;
|
||||||
|
Oid dependingClassId = InvalidOid;
|
||||||
|
|
||||||
|
if (pg_depend->classid == RewriteRelationId)
|
||||||
|
{
|
||||||
|
dependingOid = GetDependingView(pg_depend);
|
||||||
|
dependingClassId = RelationRelationId;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
dependingOid = pg_depend->objid;
|
||||||
|
dependingClassId = pg_depend->classid;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (DoesCascadeDropUnsupportedObject(dependingClassId, dependingOid, nodeMap))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GetViewCreationCommandsOfTable takes a table oid generates the CREATE VIEW
|
* GetViewCreationCommandsOfTable takes a table oid generates the CREATE VIEW
|
||||||
* commands for views that depend to the given table. This includes the views
|
* commands for views that depend to the given table. This includes the views
|
||||||
|
@ -1134,7 +1292,7 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands,
|
||||||
{
|
{
|
||||||
if (!suppressNoticeMessages)
|
if (!suppressNoticeMessages)
|
||||||
{
|
{
|
||||||
ereport(NOTICE, (errmsg("Moving the data of %s",
|
ereport(NOTICE, (errmsg("moving the data of %s",
|
||||||
quote_qualified_identifier(schemaName, sourceName))));
|
quote_qualified_identifier(schemaName, sourceName))));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1207,7 +1365,7 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands,
|
||||||
|
|
||||||
if (!suppressNoticeMessages)
|
if (!suppressNoticeMessages)
|
||||||
{
|
{
|
||||||
ereport(NOTICE, (errmsg("Dropping the old %s",
|
ereport(NOTICE, (errmsg("dropping the old %s",
|
||||||
quote_qualified_identifier(schemaName, sourceName))));
|
quote_qualified_identifier(schemaName, sourceName))));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1218,7 +1376,7 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands,
|
||||||
|
|
||||||
if (!suppressNoticeMessages)
|
if (!suppressNoticeMessages)
|
||||||
{
|
{
|
||||||
ereport(NOTICE, (errmsg("Renaming the new table to %s",
|
ereport(NOTICE, (errmsg("renaming the new table to %s",
|
||||||
quote_qualified_identifier(schemaName, sourceName))));
|
quote_qualified_identifier(schemaName, sourceName))));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1572,3 +1730,132 @@ ExecuteQueryViaSPI(char *query, int SPIOK)
|
||||||
ereport(ERROR, (errmsg("could not finish SPI connection")));
|
ereport(ERROR, (errmsg("could not finish SPI connection")));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SwitchToSequentialAndLocalExecutionIfRelationNameTooLong generates the longest shard name
|
||||||
|
* on the shards of a distributed table, and if exceeds the limit switches to sequential and
|
||||||
|
* local execution to prevent self-deadlocks.
|
||||||
|
*
|
||||||
|
* In case of a RENAME, the relation name parameter should store the new table name, so
|
||||||
|
* that the function can generate shard names of the renamed relations
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
SwitchToSequentialAndLocalExecutionIfRelationNameTooLong(Oid relationId,
|
||||||
|
char *finalRelationName)
|
||||||
|
{
|
||||||
|
if (!IsCitusTable(relationId))
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ShardIntervalCount(relationId) == 0)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Relation has no shards, so we cannot run into "long shard relation
|
||||||
|
* name" issue.
|
||||||
|
*/
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *longestShardName = GetLongestShardName(relationId, finalRelationName);
|
||||||
|
bool switchedToSequentialAndLocalExecution =
|
||||||
|
SwitchToSequentialAndLocalExecutionIfShardNameTooLong(finalRelationName,
|
||||||
|
longestShardName);
|
||||||
|
|
||||||
|
if (switchedToSequentialAndLocalExecution)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PartitionedTable(relationId))
|
||||||
|
{
|
||||||
|
Oid longestNamePartitionId = PartitionWithLongestNameRelationId(relationId);
|
||||||
|
if (!OidIsValid(longestNamePartitionId))
|
||||||
|
{
|
||||||
|
/* no partitions have been created yet */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *longestPartitionName = get_rel_name(longestNamePartitionId);
|
||||||
|
char *longestPartitionShardName = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use the shardId values of the partition if it is distributed, otherwise use
|
||||||
|
* hypothetical values
|
||||||
|
*/
|
||||||
|
if (IsCitusTable(longestNamePartitionId) &&
|
||||||
|
ShardIntervalCount(longestNamePartitionId) > 0)
|
||||||
|
{
|
||||||
|
longestPartitionShardName =
|
||||||
|
GetLongestShardName(longestNamePartitionId, longestPartitionName);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
longestPartitionShardName =
|
||||||
|
GetLongestShardNameForLocalPartition(relationId, longestPartitionName);
|
||||||
|
}
|
||||||
|
|
||||||
|
SwitchToSequentialAndLocalExecutionIfShardNameTooLong(longestPartitionName,
|
||||||
|
longestPartitionShardName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SwitchToSequentialAndLocalExecutionIfShardNameTooLong switches to sequential and local
|
||||||
|
* execution if the shard name is too long.
|
||||||
|
*
|
||||||
|
* returns true if switched to sequential and local execution.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
SwitchToSequentialAndLocalExecutionIfShardNameTooLong(char *relationName,
|
||||||
|
char *longestShardName)
|
||||||
|
{
|
||||||
|
if (strlen(longestShardName) >= NAMEDATALEN - 1)
|
||||||
|
{
|
||||||
|
if (ParallelQueryExecutedInTransaction())
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If there has already been a parallel query executed, the sequential mode
|
||||||
|
* would still use the already opened parallel connections to the workers,
|
||||||
|
* thus contradicting our purpose of using sequential mode.
|
||||||
|
*/
|
||||||
|
ereport(ERROR, (errmsg(
|
||||||
|
"Shard name (%s) for table (%s) is too long and could "
|
||||||
|
"lead to deadlocks when executed in a transaction "
|
||||||
|
"block after a parallel query", longestShardName,
|
||||||
|
relationName),
|
||||||
|
errhint("Try re-running the transaction with "
|
||||||
|
"\"SET LOCAL citus.multi_shard_modify_mode TO "
|
||||||
|
"\'sequential\';\"")));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
elog(DEBUG1, "the name of the shard (%s) for relation (%s) is too long, "
|
||||||
|
"switching to sequential and local execution mode to prevent "
|
||||||
|
"self deadlocks",
|
||||||
|
longestShardName, relationName);
|
||||||
|
|
||||||
|
SetLocalMultiShardModifyModeToSequential();
|
||||||
|
SetLocalExecutionStatus(LOCAL_EXECUTION_REQUIRED);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SwitchToSequentialAndLocalExecutionIfPartitionNameTooLong is a wrapper for new
|
||||||
|
* partitions that will be distributed after attaching to a distributed partitioned table
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
SwitchToSequentialAndLocalExecutionIfPartitionNameTooLong(Oid parentRelationId,
|
||||||
|
Oid partitionRelationId)
|
||||||
|
{
|
||||||
|
SwitchToSequentialAndLocalExecutionIfRelationNameTooLong(
|
||||||
|
parentRelationId, get_rel_name(partitionRelationId));
|
||||||
|
}
|
||||||
|
|
|
@ -510,6 +510,6 @@ ExecuteForeignKeyCreateCommand(const char *commandString, bool skip_validation)
|
||||||
"command \"%s\"", commandString)));
|
"command \"%s\"", commandString)));
|
||||||
}
|
}
|
||||||
|
|
||||||
ProcessUtilityParseTree(parseTree, commandString, PROCESS_UTILITY_TOPLEVEL,
|
ProcessUtilityParseTree(parseTree, commandString, PROCESS_UTILITY_QUERY,
|
||||||
NULL, None_Receiver, NULL);
|
NULL, None_Receiver, NULL);
|
||||||
}
|
}
|
||||||
|
|
|
@ -412,6 +412,24 @@ CreateDistributedTable(Oid relationId, Var *distributionColumn, char distributio
|
||||||
char replicationModel = DecideReplicationModel(distributionMethod,
|
char replicationModel = DecideReplicationModel(distributionMethod,
|
||||||
viaDeprecatedAPI);
|
viaDeprecatedAPI);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Due to dropping columns, the parent's distribution key may not match the
|
||||||
|
* partition's distribution key. The input distributionColumn belongs to
|
||||||
|
* the parent. That's why we override the distribution column of partitions
|
||||||
|
* here. See issue #5123 for details.
|
||||||
|
*/
|
||||||
|
if (PartitionTable(relationId))
|
||||||
|
{
|
||||||
|
Oid parentRelationId = PartitionParentOid(relationId);
|
||||||
|
char *distributionColumnName =
|
||||||
|
ColumnToColumnName(parentRelationId, nodeToString(distributionColumn));
|
||||||
|
|
||||||
|
distributionColumn =
|
||||||
|
FindColumnWithNameOnTargetRelation(parentRelationId, distributionColumnName,
|
||||||
|
relationId);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ColocationIdForNewTable assumes caller acquires lock on relationId. In our case,
|
* ColocationIdForNewTable assumes caller acquires lock on relationId. In our case,
|
||||||
* our caller already acquired lock on relationId.
|
* our caller already acquired lock on relationId.
|
||||||
|
|
|
@ -411,15 +411,16 @@ static char *
|
||||||
GenerateLongestShardPartitionIndexName(IndexStmt *createIndexStatement)
|
GenerateLongestShardPartitionIndexName(IndexStmt *createIndexStatement)
|
||||||
{
|
{
|
||||||
Oid relationId = CreateIndexStmtGetRelationId(createIndexStatement);
|
Oid relationId = CreateIndexStmtGetRelationId(createIndexStatement);
|
||||||
char *longestPartitionName = LongestPartitionName(relationId);
|
Oid longestNamePartitionId = PartitionWithLongestNameRelationId(relationId);
|
||||||
if (longestPartitionName == NULL)
|
if (!OidIsValid(longestNamePartitionId))
|
||||||
{
|
{
|
||||||
/* no partitions have been created yet */
|
/* no partitions have been created yet */
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *longestPartitionShardName = pstrdup(longestPartitionName);
|
char *longestPartitionShardName = get_rel_name(longestNamePartitionId);
|
||||||
ShardInterval *shardInterval = LoadShardIntervalWithLongestShardName(relationId);
|
ShardInterval *shardInterval = LoadShardIntervalWithLongestShardName(
|
||||||
|
longestNamePartitionId);
|
||||||
AppendShardIdToName(&longestPartitionShardName, shardInterval->shardId);
|
AppendShardIdToName(&longestPartitionShardName, shardInterval->shardId);
|
||||||
|
|
||||||
IndexStmt *createLongestShardIndexStmt = copyObject(createIndexStatement);
|
IndexStmt *createLongestShardIndexStmt = copyObject(createIndexStatement);
|
||||||
|
|
|
@ -2244,7 +2244,7 @@ CitusCopyDestReceiverStartup(DestReceiver *dest, int operation,
|
||||||
if (cacheEntry->replicationModel == REPLICATION_MODEL_2PC ||
|
if (cacheEntry->replicationModel == REPLICATION_MODEL_2PC ||
|
||||||
MultiShardCommitProtocol == COMMIT_PROTOCOL_2PC)
|
MultiShardCommitProtocol == COMMIT_PROTOCOL_2PC)
|
||||||
{
|
{
|
||||||
CoordinatedTransactionUse2PC();
|
CoordinatedTransactionShouldUse2PC();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* define how tuples will be serialised */
|
/* define how tuples will be serialised */
|
||||||
|
|
|
@ -109,6 +109,13 @@ PreprocessRenameStmt(Node *node, const char *renameCommand,
|
||||||
*/
|
*/
|
||||||
ErrorIfUnsupportedRenameStmt(renameStmt);
|
ErrorIfUnsupportedRenameStmt(renameStmt);
|
||||||
|
|
||||||
|
if (renameStmt->renameType == OBJECT_TABLE ||
|
||||||
|
renameStmt->renameType == OBJECT_FOREIGN_TABLE)
|
||||||
|
{
|
||||||
|
SwitchToSequentialAndLocalExecutionIfRelationNameTooLong(tableRelationId,
|
||||||
|
renameStmt->newname);
|
||||||
|
}
|
||||||
|
|
||||||
DDLJob *ddlJob = palloc0(sizeof(DDLJob));
|
DDLJob *ddlJob = palloc0(sizeof(DDLJob));
|
||||||
ddlJob->targetRelationId = tableRelationId;
|
ddlJob->targetRelationId = tableRelationId;
|
||||||
ddlJob->concurrentIndexCmd = false;
|
ddlJob->concurrentIndexCmd = false;
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
#include "distributed/commands/utility_hook.h"
|
#include "distributed/commands/utility_hook.h"
|
||||||
#include "distributed/deparser.h"
|
#include "distributed/deparser.h"
|
||||||
#include "distributed/deparse_shard_query.h"
|
#include "distributed/deparse_shard_query.h"
|
||||||
|
#include "distributed/distribution_column.h"
|
||||||
#include "distributed/listutils.h"
|
#include "distributed/listutils.h"
|
||||||
#include "distributed/coordinator_protocol.h"
|
#include "distributed/coordinator_protocol.h"
|
||||||
#include "distributed/metadata_sync.h"
|
#include "distributed/metadata_sync.h"
|
||||||
|
@ -324,6 +325,9 @@ PostprocessCreateTableStmtPartitionOf(CreateStmt *createStatement, const
|
||||||
char *parentRelationName = generate_qualified_relation_name(parentRelationId);
|
char *parentRelationName = generate_qualified_relation_name(parentRelationId);
|
||||||
bool viaDeprecatedAPI = false;
|
bool viaDeprecatedAPI = false;
|
||||||
|
|
||||||
|
SwitchToSequentialAndLocalExecutionIfPartitionNameTooLong(parentRelationId,
|
||||||
|
relationId);
|
||||||
|
|
||||||
CreateDistributedTable(relationId, parentDistributionColumn,
|
CreateDistributedTable(relationId, parentDistributionColumn,
|
||||||
parentDistributionMethod, ShardCount,
|
parentDistributionMethod, ShardCount,
|
||||||
parentRelationName, viaDeprecatedAPI);
|
parentRelationName, viaDeprecatedAPI);
|
||||||
|
@ -398,6 +402,9 @@ PostprocessAlterTableStmtAttachPartition(AlterTableStmt *alterTableStatement,
|
||||||
char *parentRelationName = generate_qualified_relation_name(relationId);
|
char *parentRelationName = generate_qualified_relation_name(relationId);
|
||||||
bool viaDeprecatedAPI = false;
|
bool viaDeprecatedAPI = false;
|
||||||
|
|
||||||
|
SwitchToSequentialAndLocalExecutionIfPartitionNameTooLong(
|
||||||
|
relationId, partitionRelationId);
|
||||||
|
|
||||||
CreateDistributedTable(partitionRelationId, distributionColumn,
|
CreateDistributedTable(partitionRelationId, distributionColumn,
|
||||||
distributionMethod, ShardCount,
|
distributionMethod, ShardCount,
|
||||||
parentRelationName, viaDeprecatedAPI);
|
parentRelationName, viaDeprecatedAPI);
|
||||||
|
|
|
@ -910,6 +910,26 @@ ExecuteDistributedDDLJob(DDLJob *ddlJob)
|
||||||
*/
|
*/
|
||||||
if (ddlJob->startNewTransaction)
|
if (ddlJob->startNewTransaction)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* If cache is not populated, system catalog lookups will cause
|
||||||
|
* the xmin of current backend to change. Then the last phase
|
||||||
|
* of CREATE INDEX CONCURRENTLY, which is in a separate backend,
|
||||||
|
* will hang waiting for our backend and result in a deadlock.
|
||||||
|
*
|
||||||
|
* We populate the cache before starting the next transaction to
|
||||||
|
* avoid this. Most of the metadata has already been resolved in
|
||||||
|
* planning phase, we only need to lookup metadata needed for
|
||||||
|
* connection establishment.
|
||||||
|
*/
|
||||||
|
(void) CurrentDatabaseName();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ConnParams (AuthInfo and PoolInfo) gets a snapshot, which
|
||||||
|
* will blocks the remote connections to localhost. Hence we warm up
|
||||||
|
* the cache here so that after we start a new transaction, the entries
|
||||||
|
* will already be in the hash table, hence we won't be holding any snapshots.
|
||||||
|
*/
|
||||||
|
WarmUpConnParamsHash();
|
||||||
CommitTransactionCommand();
|
CommitTransactionCommand();
|
||||||
StartTransactionCommand();
|
StartTransactionCommand();
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
|
|
||||||
/* stores the string representation of our node connection GUC */
|
/* stores the string representation of our node connection GUC */
|
||||||
char *NodeConninfo = "";
|
char *NodeConninfo = "";
|
||||||
|
char *LocalHostName = "localhost";
|
||||||
|
|
||||||
/* represents a list of libpq parameter settings */
|
/* represents a list of libpq parameter settings */
|
||||||
typedef struct ConnParamsInfo
|
typedef struct ConnParamsInfo
|
||||||
|
|
|
@ -32,6 +32,7 @@
|
||||||
#include "distributed/shared_connection_stats.h"
|
#include "distributed/shared_connection_stats.h"
|
||||||
#include "distributed/cancel_utils.h"
|
#include "distributed/cancel_utils.h"
|
||||||
#include "distributed/remote_commands.h"
|
#include "distributed/remote_commands.h"
|
||||||
|
#include "distributed/time_constants.h"
|
||||||
#include "distributed/version_compat.h"
|
#include "distributed/version_compat.h"
|
||||||
#include "distributed/worker_log_messages.h"
|
#include "distributed/worker_log_messages.h"
|
||||||
#include "mb/pg_wchar.h"
|
#include "mb/pg_wchar.h"
|
||||||
|
@ -43,6 +44,7 @@
|
||||||
|
|
||||||
int NodeConnectionTimeout = 30000;
|
int NodeConnectionTimeout = 30000;
|
||||||
int MaxCachedConnectionsPerWorker = 1;
|
int MaxCachedConnectionsPerWorker = 1;
|
||||||
|
int MaxCachedConnectionLifetime = 10 * MS_PER_MINUTE;
|
||||||
|
|
||||||
HTAB *ConnectionHash = NULL;
|
HTAB *ConnectionHash = NULL;
|
||||||
HTAB *ConnParamsHash = NULL;
|
HTAB *ConnParamsHash = NULL;
|
||||||
|
@ -85,6 +87,7 @@ static WaitEventSet * WaitEventSetFromMultiConnectionStates(List *connections,
|
||||||
static void CloseNotReadyMultiConnectionStates(List *connectionStates);
|
static void CloseNotReadyMultiConnectionStates(List *connectionStates);
|
||||||
static uint32 MultiConnectionStateEventMask(MultiConnectionPollState *connectionState);
|
static uint32 MultiConnectionStateEventMask(MultiConnectionPollState *connectionState);
|
||||||
static void CitusPQFinish(MultiConnection *connection);
|
static void CitusPQFinish(MultiConnection *connection);
|
||||||
|
static ConnParamsHashEntry * FindOrCreateConnParamsEntry(ConnectionHashKey *key);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize per-backend connection management infrastructure.
|
* Initialize per-backend connection management infrastructure.
|
||||||
|
@ -1127,9 +1130,62 @@ ConnectionHashCompare(const void *a, const void *b, Size keysize)
|
||||||
static void
|
static void
|
||||||
StartConnectionEstablishment(MultiConnection *connection, ConnectionHashKey *key)
|
StartConnectionEstablishment(MultiConnection *connection, ConnectionHashKey *key)
|
||||||
{
|
{
|
||||||
bool found = false;
|
|
||||||
static uint64 connectionId = 1;
|
static uint64 connectionId = 1;
|
||||||
|
|
||||||
|
ConnParamsHashEntry *entry = FindOrCreateConnParamsEntry(key);
|
||||||
|
|
||||||
|
strlcpy(connection->hostname, key->hostname, MAX_NODE_LENGTH);
|
||||||
|
connection->port = key->port;
|
||||||
|
strlcpy(connection->database, key->database, NAMEDATALEN);
|
||||||
|
strlcpy(connection->user, key->user, NAMEDATALEN);
|
||||||
|
|
||||||
|
connection->pgConn = PQconnectStartParams((const char **) entry->keywords,
|
||||||
|
(const char **) entry->values,
|
||||||
|
false);
|
||||||
|
connection->connectionStart = GetCurrentTimestamp();
|
||||||
|
connection->connectionId = connectionId++;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* To avoid issues with interrupts not getting caught all our connections
|
||||||
|
* are managed in a non-blocking manner. remote_commands.c provides
|
||||||
|
* wrappers emulating blocking behaviour.
|
||||||
|
*/
|
||||||
|
PQsetnonblocking(connection->pgConn, true);
|
||||||
|
|
||||||
|
SetCitusNoticeReceiver(connection);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* WarmUpConnParamsHash warms up the ConnParamsHash by loading all the
|
||||||
|
* conn params for active primary nodes.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
WarmUpConnParamsHash(void)
|
||||||
|
{
|
||||||
|
List *workerNodeList = ActivePrimaryNodeList(AccessShareLock);
|
||||||
|
WorkerNode *workerNode = NULL;
|
||||||
|
foreach_ptr(workerNode, workerNodeList)
|
||||||
|
{
|
||||||
|
ConnectionHashKey key;
|
||||||
|
strlcpy(key.hostname, workerNode->workerName, MAX_NODE_LENGTH);
|
||||||
|
key.port = workerNode->workerPort;
|
||||||
|
strlcpy(key.database, CurrentDatabaseName(), NAMEDATALEN);
|
||||||
|
strlcpy(key.user, CurrentUserName(), NAMEDATALEN);
|
||||||
|
FindOrCreateConnParamsEntry(&key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FindOrCreateConnParamsEntry searches ConnParamsHash for the given key,
|
||||||
|
* if it is not found, it is created.
|
||||||
|
*/
|
||||||
|
static ConnParamsHashEntry *
|
||||||
|
FindOrCreateConnParamsEntry(ConnectionHashKey *key)
|
||||||
|
{
|
||||||
|
bool found = false;
|
||||||
|
|
||||||
/* search our cache for precomputed connection settings */
|
/* search our cache for precomputed connection settings */
|
||||||
ConnParamsHashEntry *entry = hash_search(ConnParamsHash, key, HASH_ENTER, &found);
|
ConnParamsHashEntry *entry = hash_search(ConnParamsHash, key, HASH_ENTER, &found);
|
||||||
if (!found || !entry->isValid)
|
if (!found || !entry->isValid)
|
||||||
|
@ -1157,25 +1213,7 @@ StartConnectionEstablishment(MultiConnection *connection, ConnectionHashKey *key
|
||||||
entry->isValid = true;
|
entry->isValid = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
strlcpy(connection->hostname, key->hostname, MAX_NODE_LENGTH);
|
return entry;
|
||||||
connection->port = key->port;
|
|
||||||
strlcpy(connection->database, key->database, NAMEDATALEN);
|
|
||||||
strlcpy(connection->user, key->user, NAMEDATALEN);
|
|
||||||
|
|
||||||
connection->pgConn = PQconnectStartParams((const char **) entry->keywords,
|
|
||||||
(const char **) entry->values,
|
|
||||||
false);
|
|
||||||
connection->connectionStart = GetCurrentTimestamp();
|
|
||||||
connection->connectionId = connectionId++;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* To avoid issues with interrupts not getting caught all our connections
|
|
||||||
* are managed in a non-blocking manner. remote_commands.c provides
|
|
||||||
* wrappers emulating blocking behaviour.
|
|
||||||
*/
|
|
||||||
PQsetnonblocking(connection->pgConn, true);
|
|
||||||
|
|
||||||
SetCitusNoticeReceiver(connection);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1288,6 +1326,7 @@ AfterXactHostConnectionHandling(ConnectionHashEntry *entry, bool isCommit)
|
||||||
* - Connection is forced to close at the end of transaction
|
* - Connection is forced to close at the end of transaction
|
||||||
* - Connection is not in OK state
|
* - Connection is not in OK state
|
||||||
* - A transaction is still in progress (usually because we are cancelling a distributed transaction)
|
* - A transaction is still in progress (usually because we are cancelling a distributed transaction)
|
||||||
|
* - A connection reached its maximum lifetime
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
ShouldShutdownConnection(MultiConnection *connection, const int cachedConnectionCount)
|
ShouldShutdownConnection(MultiConnection *connection, const int cachedConnectionCount)
|
||||||
|
@ -1303,7 +1342,10 @@ ShouldShutdownConnection(MultiConnection *connection, const int cachedConnection
|
||||||
cachedConnectionCount >= MaxCachedConnectionsPerWorker ||
|
cachedConnectionCount >= MaxCachedConnectionsPerWorker ||
|
||||||
connection->forceCloseAtTransactionEnd ||
|
connection->forceCloseAtTransactionEnd ||
|
||||||
PQstatus(connection->pgConn) != CONNECTION_OK ||
|
PQstatus(connection->pgConn) != CONNECTION_OK ||
|
||||||
!RemoteTransactionIdle(connection);
|
!RemoteTransactionIdle(connection) ||
|
||||||
|
(MaxCachedConnectionLifetime >= 0 &&
|
||||||
|
TimestampDifferenceExceeds(connection->connectionStart, GetCurrentTimestamp(),
|
||||||
|
MaxCachedConnectionLifetime));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,11 @@
|
||||||
#include "utils/palloc.h"
|
#include "utils/palloc.h"
|
||||||
|
|
||||||
|
|
||||||
#define MAX_PUT_COPY_DATA_BUFFER_SIZE (8 * 1024 * 1024)
|
/*
|
||||||
|
* Setting that controls how many bytes of COPY data libpq is allowed to buffer
|
||||||
|
* internally before we force a flush.
|
||||||
|
*/
|
||||||
|
int RemoteCopyFlushThreshold = 8 * 1024 * 1024;
|
||||||
|
|
||||||
|
|
||||||
/* GUC, determining whether statements sent to remote nodes are logged */
|
/* GUC, determining whether statements sent to remote nodes are logged */
|
||||||
|
@ -620,7 +624,7 @@ PutRemoteCopyData(MultiConnection *connection, const char *buffer, int nbytes)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
connection->copyBytesWrittenSinceLastFlush += nbytes;
|
connection->copyBytesWrittenSinceLastFlush += nbytes;
|
||||||
if (connection->copyBytesWrittenSinceLastFlush > MAX_PUT_COPY_DATA_BUFFER_SIZE)
|
if (connection->copyBytesWrittenSinceLastFlush > RemoteCopyFlushThreshold)
|
||||||
{
|
{
|
||||||
connection->copyBytesWrittenSinceLastFlush = 0;
|
connection->copyBytesWrittenSinceLastFlush = 0;
|
||||||
return FinishConnectionIO(connection, allowInterrupts);
|
return FinishConnectionIO(connection, allowInterrupts);
|
||||||
|
|
|
@ -7055,9 +7055,10 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
|
||||||
ExtractRangeTblExtraData(rte, NULL, &fragmentSchemaName, &fragmentTableName, NULL);
|
ExtractRangeTblExtraData(rte, NULL, &fragmentSchemaName, &fragmentTableName, NULL);
|
||||||
|
|
||||||
/* use schema and table name from the remote alias */
|
/* use schema and table name from the remote alias */
|
||||||
appendStringInfoString(buf,
|
appendStringInfo(buf, "%s%s",
|
||||||
generate_fragment_name(fragmentSchemaName,
|
only_marker(rte),
|
||||||
fragmentTableName));
|
generate_fragment_name(fragmentSchemaName,
|
||||||
|
fragmentTableName));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7057,9 +7057,10 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
|
||||||
ExtractRangeTblExtraData(rte, NULL, &fragmentSchemaName, &fragmentTableName, NULL);
|
ExtractRangeTblExtraData(rte, NULL, &fragmentSchemaName, &fragmentTableName, NULL);
|
||||||
|
|
||||||
/* use schema and table name from the remote alias */
|
/* use schema and table name from the remote alias */
|
||||||
appendStringInfoString(buf,
|
appendStringInfo(buf, "%s%s",
|
||||||
generate_fragment_name(fragmentSchemaName,
|
only_marker(rte),
|
||||||
fragmentTableName));
|
generate_fragment_name(fragmentSchemaName,
|
||||||
|
fragmentTableName));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7115,9 +7115,10 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
|
||||||
ExtractRangeTblExtraData(rte, NULL, &fragmentSchemaName, &fragmentTableName, NULL);
|
ExtractRangeTblExtraData(rte, NULL, &fragmentSchemaName, &fragmentTableName, NULL);
|
||||||
|
|
||||||
/* use schema and table name from the remote alias */
|
/* use schema and table name from the remote alias */
|
||||||
appendStringInfoString(buf,
|
appendStringInfo(buf, "%s%s",
|
||||||
generate_fragment_name(fragmentSchemaName,
|
only_marker(rte),
|
||||||
fragmentTableName));
|
generate_fragment_name(fragmentSchemaName,
|
||||||
|
fragmentTableName));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -174,6 +174,8 @@
|
||||||
#include "utils/timestamp.h"
|
#include "utils/timestamp.h"
|
||||||
|
|
||||||
#define SLOW_START_DISABLED 0
|
#define SLOW_START_DISABLED 0
|
||||||
|
#define WAIT_EVENT_SET_INDEX_NOT_INITIALIZED -1
|
||||||
|
#define WAIT_EVENT_SET_INDEX_FAILED -2
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -638,6 +640,10 @@ static int UsableConnectionCount(WorkerPool *workerPool);
|
||||||
static long NextEventTimeout(DistributedExecution *execution);
|
static long NextEventTimeout(DistributedExecution *execution);
|
||||||
static WaitEventSet * BuildWaitEventSet(List *sessionList);
|
static WaitEventSet * BuildWaitEventSet(List *sessionList);
|
||||||
static void RebuildWaitEventSetFlags(WaitEventSet *waitEventSet, List *sessionList);
|
static void RebuildWaitEventSetFlags(WaitEventSet *waitEventSet, List *sessionList);
|
||||||
|
static int CitusAddWaitEventSetToSet(WaitEventSet *set, uint32 events, pgsocket fd,
|
||||||
|
Latch *latch, void *user_data);
|
||||||
|
static bool CitusModifyWaitEvent(WaitEventSet *set, int pos, uint32 events,
|
||||||
|
Latch *latch);
|
||||||
static TaskPlacementExecution * PopPlacementExecution(WorkerSession *session);
|
static TaskPlacementExecution * PopPlacementExecution(WorkerSession *session);
|
||||||
static TaskPlacementExecution * PopAssignedPlacementExecution(WorkerSession *session);
|
static TaskPlacementExecution * PopAssignedPlacementExecution(WorkerSession *session);
|
||||||
static TaskPlacementExecution * PopUnassignedPlacementExecution(WorkerPool *workerPool);
|
static TaskPlacementExecution * PopUnassignedPlacementExecution(WorkerPool *workerPool);
|
||||||
|
@ -671,6 +677,8 @@ static void ExtractParametersForRemoteExecution(ParamListInfo paramListInfo,
|
||||||
Oid **parameterTypes,
|
Oid **parameterTypes,
|
||||||
const char ***parameterValues);
|
const char ***parameterValues);
|
||||||
static int GetEventSetSize(List *sessionList);
|
static int GetEventSetSize(List *sessionList);
|
||||||
|
static bool ProcessSessionsWithFailedWaitEventSetOperations(
|
||||||
|
DistributedExecution *execution);
|
||||||
static int RebuildWaitEventSet(DistributedExecution *execution);
|
static int RebuildWaitEventSet(DistributedExecution *execution);
|
||||||
static void ProcessWaitEvents(DistributedExecution *execution, WaitEvent *events, int
|
static void ProcessWaitEvents(DistributedExecution *execution, WaitEvent *events, int
|
||||||
eventCount, bool *cancellationReceived);
|
eventCount, bool *cancellationReceived);
|
||||||
|
@ -1165,23 +1173,6 @@ DecideTransactionPropertiesForTaskList(RowModifyLevel modLevel, List *taskList,
|
||||||
return xactProperties;
|
return xactProperties;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (GetCurrentLocalExecutionStatus() == LOCAL_EXECUTION_REQUIRED)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* In case localExecutionHappened, we force the executor to use 2PC.
|
|
||||||
* The primary motivation is that at this point we're definitely expanding
|
|
||||||
* the nodes participated in the transaction. And, by re-generating the
|
|
||||||
* remote task lists during local query execution, we might prevent the adaptive
|
|
||||||
* executor to kick-in 2PC (or even start coordinated transaction, that's why
|
|
||||||
* we prefer adding this check here instead of
|
|
||||||
* Activate2PCIfModifyingTransactionExpandsToNewNode()).
|
|
||||||
*/
|
|
||||||
xactProperties.errorOnAnyFailure = true;
|
|
||||||
xactProperties.useRemoteTransactionBlocks = TRANSACTION_BLOCKS_REQUIRED;
|
|
||||||
xactProperties.requires2PC = true;
|
|
||||||
return xactProperties;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (DistributedExecutionRequiresRollback(taskList))
|
if (DistributedExecutionRequiresRollback(taskList))
|
||||||
{
|
{
|
||||||
/* transaction blocks are required if the task list needs to roll back */
|
/* transaction blocks are required if the task list needs to roll back */
|
||||||
|
@ -1240,7 +1231,7 @@ StartDistributedExecution(DistributedExecution *execution)
|
||||||
|
|
||||||
if (xactProperties->requires2PC)
|
if (xactProperties->requires2PC)
|
||||||
{
|
{
|
||||||
CoordinatedTransactionUse2PC();
|
CoordinatedTransactionShouldUse2PC();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2092,6 +2083,7 @@ FindOrCreateWorkerSession(WorkerPool *workerPool, MultiConnection *connection)
|
||||||
session->connection = connection;
|
session->connection = connection;
|
||||||
session->workerPool = workerPool;
|
session->workerPool = workerPool;
|
||||||
session->commandsSent = 0;
|
session->commandsSent = 0;
|
||||||
|
session->waitEventSetIndex = WAIT_EVENT_SET_INDEX_NOT_INITIALIZED;
|
||||||
|
|
||||||
dlist_init(&session->pendingTaskQueue);
|
dlist_init(&session->pendingTaskQueue);
|
||||||
dlist_init(&session->readyTaskQueue);
|
dlist_init(&session->readyTaskQueue);
|
||||||
|
@ -2236,6 +2228,7 @@ RunDistributedExecution(DistributedExecution *execution)
|
||||||
ManageWorkerPool(workerPool);
|
ManageWorkerPool(workerPool);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool skipWaitEvents = false;
|
||||||
if (execution->remoteTaskList == NIL)
|
if (execution->remoteTaskList == NIL)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -2257,11 +2250,28 @@ RunDistributedExecution(DistributedExecution *execution)
|
||||||
}
|
}
|
||||||
eventSetSize = RebuildWaitEventSet(execution);
|
eventSetSize = RebuildWaitEventSet(execution);
|
||||||
events = palloc0(eventSetSize * sizeof(WaitEvent));
|
events = palloc0(eventSetSize * sizeof(WaitEvent));
|
||||||
|
|
||||||
|
skipWaitEvents =
|
||||||
|
ProcessSessionsWithFailedWaitEventSetOperations(execution);
|
||||||
}
|
}
|
||||||
else if (execution->waitFlagsChanged)
|
else if (execution->waitFlagsChanged)
|
||||||
{
|
{
|
||||||
RebuildWaitEventSetFlags(execution->waitEventSet, execution->sessionList);
|
RebuildWaitEventSetFlags(execution->waitEventSet, execution->sessionList);
|
||||||
execution->waitFlagsChanged = false;
|
execution->waitFlagsChanged = false;
|
||||||
|
|
||||||
|
skipWaitEvents =
|
||||||
|
ProcessSessionsWithFailedWaitEventSetOperations(execution);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (skipWaitEvents)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Some operation on the wait event set is failed, retry
|
||||||
|
* as we already removed the problematic connections.
|
||||||
|
*/
|
||||||
|
execution->rebuildWaitEventSet = true;
|
||||||
|
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* wait for I/O events */
|
/* wait for I/O events */
|
||||||
|
@ -2310,6 +2320,51 @@ RunDistributedExecution(DistributedExecution *execution)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ProcessSessionsWithFailedEventSetOperations goes over the session list and
|
||||||
|
* processes sessions with failed wait event set operations.
|
||||||
|
*
|
||||||
|
* Failed sessions are not going to generate any further events, so it is our
|
||||||
|
* only chance to process the failure by calling into `ConnectionStateMachine`.
|
||||||
|
*
|
||||||
|
* The function returns true if any session failed.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
ProcessSessionsWithFailedWaitEventSetOperations(DistributedExecution *execution)
|
||||||
|
{
|
||||||
|
bool foundFailedSession = false;
|
||||||
|
WorkerSession *session = NULL;
|
||||||
|
foreach_ptr(session, execution->sessionList)
|
||||||
|
{
|
||||||
|
if (session->waitEventSetIndex == WAIT_EVENT_SET_INDEX_FAILED)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We can only lost only already connected connections,
|
||||||
|
* others are regular failures.
|
||||||
|
*/
|
||||||
|
MultiConnection *connection = session->connection;
|
||||||
|
if (connection->connectionState == MULTI_CONNECTION_CONNECTED)
|
||||||
|
{
|
||||||
|
connection->connectionState = MULTI_CONNECTION_LOST;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
connection->connectionState = MULTI_CONNECTION_FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ConnectionStateMachine(session);
|
||||||
|
|
||||||
|
session->waitEventSetIndex = WAIT_EVENT_SET_INDEX_NOT_INITIALIZED;
|
||||||
|
|
||||||
|
foundFailedSession = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return foundFailedSession;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RebuildWaitEventSet updates the waitEventSet for the distributed execution.
|
* RebuildWaitEventSet updates the waitEventSet for the distributed execution.
|
||||||
* This happens when the connection set for the distributed execution is changed,
|
* This happens when the connection set for the distributed execution is changed,
|
||||||
|
@ -3197,7 +3252,7 @@ Activate2PCIfModifyingTransactionExpandsToNewNode(WorkerSession *session)
|
||||||
* just opened, which means we're now going to make modifications
|
* just opened, which means we're now going to make modifications
|
||||||
* over multiple connections. Activate 2PC!
|
* over multiple connections. Activate 2PC!
|
||||||
*/
|
*/
|
||||||
CoordinatedTransactionUse2PC();
|
CoordinatedTransactionShouldUse2PC();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3875,6 +3930,7 @@ ReceiveResults(WorkerSession *session, bool storeRows)
|
||||||
TupleDesc tupleDescriptor = tupleDest->tupleDescForQuery(tupleDest, queryIndex);
|
TupleDesc tupleDescriptor = tupleDest->tupleDescForQuery(tupleDest, queryIndex);
|
||||||
if (tupleDescriptor == NULL)
|
if (tupleDescriptor == NULL)
|
||||||
{
|
{
|
||||||
|
PQclear(result);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4693,18 +4749,79 @@ BuildWaitEventSet(List *sessionList)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
int waitEventSetIndex = AddWaitEventToSet(waitEventSet, connection->waitFlags,
|
int waitEventSetIndex =
|
||||||
sock, NULL, (void *) session);
|
CitusAddWaitEventSetToSet(waitEventSet, connection->waitFlags, sock,
|
||||||
|
NULL, (void *) session);
|
||||||
session->waitEventSetIndex = waitEventSetIndex;
|
session->waitEventSetIndex = waitEventSetIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
AddWaitEventToSet(waitEventSet, WL_POSTMASTER_DEATH, PGINVALID_SOCKET, NULL, NULL);
|
CitusAddWaitEventSetToSet(waitEventSet, WL_POSTMASTER_DEATH, PGINVALID_SOCKET, NULL,
|
||||||
AddWaitEventToSet(waitEventSet, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch, NULL);
|
NULL);
|
||||||
|
CitusAddWaitEventSetToSet(waitEventSet, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch,
|
||||||
|
NULL);
|
||||||
|
|
||||||
return waitEventSet;
|
return waitEventSet;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CitusAddWaitEventSetToSet is a wrapper around Postgres' AddWaitEventToSet().
|
||||||
|
*
|
||||||
|
* AddWaitEventToSet() may throw hard errors. For example, when the
|
||||||
|
* underlying socket for a connection is closed by the remote server
|
||||||
|
* and already reflected by the OS, however Citus hasn't had a chance
|
||||||
|
* to get this information. In that case, if replication factor is >1,
|
||||||
|
* Citus can failover to other nodes for executing the query. Even if
|
||||||
|
* replication factor = 1, Citus can give much nicer errors.
|
||||||
|
*
|
||||||
|
* So CitusAddWaitEventSetToSet simply puts ModifyWaitEvent into a
|
||||||
|
* PG_TRY/PG_CATCH block in order to catch any hard errors, and
|
||||||
|
* returns this information to the caller.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
CitusAddWaitEventSetToSet(WaitEventSet *set, uint32 events, pgsocket fd,
|
||||||
|
Latch *latch, void *user_data)
|
||||||
|
{
|
||||||
|
volatile int waitEventSetIndex = WAIT_EVENT_SET_INDEX_NOT_INITIALIZED;
|
||||||
|
MemoryContext savedContext = CurrentMemoryContext;
|
||||||
|
|
||||||
|
PG_TRY();
|
||||||
|
{
|
||||||
|
waitEventSetIndex =
|
||||||
|
AddWaitEventToSet(set, events, fd, latch, (void *) user_data);
|
||||||
|
}
|
||||||
|
PG_CATCH();
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We might be in an arbitrary memory context when the
|
||||||
|
* error is thrown and we should get back to one we had
|
||||||
|
* at PG_TRY() time, especially because we are not
|
||||||
|
* re-throwing the error.
|
||||||
|
*/
|
||||||
|
MemoryContextSwitchTo(savedContext);
|
||||||
|
|
||||||
|
FlushErrorState();
|
||||||
|
|
||||||
|
if (user_data != NULL)
|
||||||
|
{
|
||||||
|
WorkerSession *workerSession = (WorkerSession *) user_data;
|
||||||
|
|
||||||
|
ereport(DEBUG1, (errcode(ERRCODE_CONNECTION_FAILURE),
|
||||||
|
errmsg("Adding wait event for node %s:%d failed. "
|
||||||
|
"The socket was: %d",
|
||||||
|
workerSession->workerPool->nodeName,
|
||||||
|
workerSession->workerPool->nodePort, fd)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* let the callers know about the failure */
|
||||||
|
waitEventSetIndex = WAIT_EVENT_SET_INDEX_FAILED;
|
||||||
|
}
|
||||||
|
PG_END_TRY();
|
||||||
|
|
||||||
|
return waitEventSetIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GetEventSetSize returns the event set size for a list of sessions.
|
* GetEventSetSize returns the event set size for a list of sessions.
|
||||||
*/
|
*/
|
||||||
|
@ -4748,11 +4865,68 @@ RebuildWaitEventSetFlags(WaitEventSet *waitEventSet, List *sessionList)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
ModifyWaitEvent(waitEventSet, waitEventSetIndex, connection->waitFlags, NULL);
|
bool success =
|
||||||
|
CitusModifyWaitEvent(waitEventSet, waitEventSetIndex,
|
||||||
|
connection->waitFlags, NULL);
|
||||||
|
if (!success)
|
||||||
|
{
|
||||||
|
ereport(DEBUG1, (errcode(ERRCODE_CONNECTION_FAILURE),
|
||||||
|
errmsg("Modifying wait event for node %s:%d failed. "
|
||||||
|
"The wait event index was: %d",
|
||||||
|
connection->hostname, connection->port,
|
||||||
|
waitEventSetIndex)));
|
||||||
|
|
||||||
|
session->waitEventSetIndex = WAIT_EVENT_SET_INDEX_FAILED;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CitusModifyWaitEvent is a wrapper around Postgres' ModifyWaitEvent().
|
||||||
|
*
|
||||||
|
* ModifyWaitEvent may throw hard errors. For example, when the underlying
|
||||||
|
* socket for a connection is closed by the remote server and already
|
||||||
|
* reflected by the OS, however Citus hasn't had a chance to get this
|
||||||
|
* information. In that case, if repliction factor is >1, Citus can
|
||||||
|
* failover to other nodes for executing the query. Even if replication
|
||||||
|
* factor = 1, Citus can give much nicer errors.
|
||||||
|
*
|
||||||
|
* So CitusModifyWaitEvent simply puts ModifyWaitEvent into a PG_TRY/PG_CATCH
|
||||||
|
* block in order to catch any hard errors, and returns this information to the
|
||||||
|
* caller.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
CitusModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
|
||||||
|
{
|
||||||
|
volatile bool success = true;
|
||||||
|
MemoryContext savedContext = CurrentMemoryContext;
|
||||||
|
|
||||||
|
PG_TRY();
|
||||||
|
{
|
||||||
|
ModifyWaitEvent(set, pos, events, latch);
|
||||||
|
}
|
||||||
|
PG_CATCH();
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We might be in an arbitrary memory context when the
|
||||||
|
* error is thrown and we should get back to one we had
|
||||||
|
* at PG_TRY() time, especially because we are not
|
||||||
|
* re-throwing the error.
|
||||||
|
*/
|
||||||
|
MemoryContextSwitchTo(savedContext);
|
||||||
|
|
||||||
|
FlushErrorState();
|
||||||
|
|
||||||
|
/* let the callers know about the failure */
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
PG_END_TRY();
|
||||||
|
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* SetLocalForceMaxQueryParallelization is simply a C interface for setting
|
* SetLocalForceMaxQueryParallelization is simply a C interface for setting
|
||||||
* the following:
|
* the following:
|
||||||
|
|
|
@ -300,7 +300,8 @@ CitusBeginReadOnlyScan(CustomScanState *node, EState *estate, int eflags)
|
||||||
* The plan will be cached across executions when originalDistributedPlan
|
* The plan will be cached across executions when originalDistributedPlan
|
||||||
* represents a prepared statement.
|
* represents a prepared statement.
|
||||||
*/
|
*/
|
||||||
CacheLocalPlanForShardQuery(task, originalDistributedPlan);
|
CacheLocalPlanForShardQuery(task, originalDistributedPlan,
|
||||||
|
estate->es_param_list_info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -342,9 +343,12 @@ CitusBeginModifyScan(CustomScanState *node, EState *estate, int eflags)
|
||||||
/*
|
/*
|
||||||
* At this point, we're about to do the shard pruning for fast-path queries.
|
* At this point, we're about to do the shard pruning for fast-path queries.
|
||||||
* Given that pruning is deferred always for INSERTs, we get here
|
* Given that pruning is deferred always for INSERTs, we get here
|
||||||
* !EnableFastPathRouterPlanner as well.
|
* !EnableFastPathRouterPlanner as well. Given that INSERT statements with
|
||||||
|
* CTEs/sublinks etc are not eligible for fast-path router plan, we get here
|
||||||
|
* jobQuery->commandType == CMD_INSERT as well.
|
||||||
*/
|
*/
|
||||||
Assert(currentPlan->fastPathRouterPlan || !EnableFastPathRouterPlanner);
|
Assert(currentPlan->fastPathRouterPlan || !EnableFastPathRouterPlanner ||
|
||||||
|
jobQuery->commandType == CMD_INSERT);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We can only now decide which shard to use, so we need to build a new task
|
* We can only now decide which shard to use, so we need to build a new task
|
||||||
|
@ -406,7 +410,8 @@ CitusBeginModifyScan(CustomScanState *node, EState *estate, int eflags)
|
||||||
* The plan will be cached across executions when originalDistributedPlan
|
* The plan will be cached across executions when originalDistributedPlan
|
||||||
* represents a prepared statement.
|
* represents a prepared statement.
|
||||||
*/
|
*/
|
||||||
CacheLocalPlanForShardQuery(task, originalDistributedPlan);
|
CacheLocalPlanForShardQuery(task, originalDistributedPlan,
|
||||||
|
estate->es_param_list_info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -125,9 +125,6 @@ static void LogLocalCommand(Task *task);
|
||||||
static uint64 LocallyPlanAndExecuteMultipleQueries(List *queryStrings,
|
static uint64 LocallyPlanAndExecuteMultipleQueries(List *queryStrings,
|
||||||
TupleDestination *tupleDest,
|
TupleDestination *tupleDest,
|
||||||
Task *task);
|
Task *task);
|
||||||
static void ExtractParametersForLocalExecution(ParamListInfo paramListInfo,
|
|
||||||
Oid **parameterTypes,
|
|
||||||
const char ***parameterValues);
|
|
||||||
static void ExecuteUdfTaskQuery(Query *localUdfCommandQuery);
|
static void ExecuteUdfTaskQuery(Query *localUdfCommandQuery);
|
||||||
static void EnsureTransitionPossible(LocalExecutionStatus from,
|
static void EnsureTransitionPossible(LocalExecutionStatus from,
|
||||||
LocalExecutionStatus to);
|
LocalExecutionStatus to);
|
||||||
|
@ -209,6 +206,19 @@ ExecuteLocalTaskListExtended(List *taskList,
|
||||||
Oid *parameterTypes = NULL;
|
Oid *parameterTypes = NULL;
|
||||||
uint64 totalRowsProcessed = 0;
|
uint64 totalRowsProcessed = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Even if we are executing local tasks, we still enable
|
||||||
|
* coordinated transaction. This is because
|
||||||
|
* (a) we might be in a transaction, and the next commands may
|
||||||
|
* require coordinated transaction
|
||||||
|
* (b) we might be executing some tasks locally and the others
|
||||||
|
* via remote execution
|
||||||
|
*
|
||||||
|
* Also, there is no harm enabling coordinated transaction even if
|
||||||
|
* we only deal with local tasks in the transaction.
|
||||||
|
*/
|
||||||
|
UseCoordinatedTransaction();
|
||||||
|
|
||||||
if (paramListInfo != NULL)
|
if (paramListInfo != NULL)
|
||||||
{
|
{
|
||||||
/* not used anywhere, so declare here */
|
/* not used anywhere, so declare here */
|
||||||
|
@ -236,6 +246,17 @@ ExecuteLocalTaskListExtended(List *taskList,
|
||||||
{
|
{
|
||||||
SetLocalExecutionStatus(LOCAL_EXECUTION_REQUIRED);
|
SetLocalExecutionStatus(LOCAL_EXECUTION_REQUIRED);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!ReadOnlyTask(task->taskType))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Any modification on the local execution should enable 2PC. If remote
|
||||||
|
* queries are also ReadOnly, our 2PC logic is smart enough to skip sending
|
||||||
|
* PREPARE to those connections.
|
||||||
|
*/
|
||||||
|
CoordinatedTransactionShouldUse2PC();
|
||||||
|
}
|
||||||
|
|
||||||
LogLocalCommand(task);
|
LogLocalCommand(task);
|
||||||
|
|
||||||
if (isUtilityCommand)
|
if (isUtilityCommand)
|
||||||
|
@ -362,7 +383,7 @@ LocallyPlanAndExecuteMultipleQueries(List *queryStrings, TupleDestination *tuple
|
||||||
* value arrays. It does not change the oid of custom types, because the
|
* value arrays. It does not change the oid of custom types, because the
|
||||||
* query will be run locally.
|
* query will be run locally.
|
||||||
*/
|
*/
|
||||||
static void
|
void
|
||||||
ExtractParametersForLocalExecution(ParamListInfo paramListInfo, Oid **parameterTypes,
|
ExtractParametersForLocalExecution(ParamListInfo paramListInfo, Oid **parameterTypes,
|
||||||
const char ***parameterValues)
|
const char ***parameterValues)
|
||||||
{
|
{
|
||||||
|
@ -406,7 +427,7 @@ ExecuteUtilityCommand(const char *taskQueryCommand)
|
||||||
* process utility.
|
* process utility.
|
||||||
*/
|
*/
|
||||||
ProcessUtilityParseTree(taskRawParseTree, taskQueryCommand,
|
ProcessUtilityParseTree(taskRawParseTree, taskQueryCommand,
|
||||||
PROCESS_UTILITY_TOPLEVEL, NULL, None_Receiver,
|
PROCESS_UTILITY_QUERY, NULL, None_Receiver,
|
||||||
NULL);
|
NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -156,7 +156,6 @@ static void ApplyAddToDependencyList(ObjectAddressCollector *collector,
|
||||||
static List * ExpandCitusSupportedTypes(ObjectAddressCollector *collector,
|
static List * ExpandCitusSupportedTypes(ObjectAddressCollector *collector,
|
||||||
ObjectAddress target);
|
ObjectAddress target);
|
||||||
static ViewDependencyNode * BuildViewDependencyGraph(Oid relationId, HTAB *nodeMap);
|
static ViewDependencyNode * BuildViewDependencyGraph(Oid relationId, HTAB *nodeMap);
|
||||||
static Oid GetDependingView(Form_pg_depend pg_depend);
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1204,18 +1203,31 @@ GetDependingView(Form_pg_depend pg_depend)
|
||||||
true, NULL, 1, rkey);
|
true, NULL, 1, rkey);
|
||||||
|
|
||||||
HeapTuple rewriteTup = systable_getnext(rscan);
|
HeapTuple rewriteTup = systable_getnext(rscan);
|
||||||
|
if (!HeapTupleIsValid(rewriteTup))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* This function already verified that objid's classid is
|
||||||
|
* RewriteRelationId, so it should exists. But be on the
|
||||||
|
* safe side.
|
||||||
|
*/
|
||||||
|
ereport(ERROR, (errmsg("catalog lookup failed for view %u",
|
||||||
|
pg_depend->objid)));
|
||||||
|
}
|
||||||
|
|
||||||
Form_pg_rewrite pg_rewrite = (Form_pg_rewrite) GETSTRUCT(rewriteTup);
|
Form_pg_rewrite pg_rewrite = (Form_pg_rewrite) GETSTRUCT(rewriteTup);
|
||||||
|
|
||||||
bool isView = get_rel_relkind(pg_rewrite->ev_class) == RELKIND_VIEW;
|
bool isView = get_rel_relkind(pg_rewrite->ev_class) == RELKIND_VIEW;
|
||||||
bool isMatView = get_rel_relkind(pg_rewrite->ev_class) == RELKIND_MATVIEW;
|
bool isMatView = get_rel_relkind(pg_rewrite->ev_class) == RELKIND_MATVIEW;
|
||||||
bool isDifferentThanRef = pg_rewrite->ev_class != pg_depend->refobjid;
|
bool isDifferentThanRef = pg_rewrite->ev_class != pg_depend->refobjid;
|
||||||
|
|
||||||
|
Oid dependingView = InvalidOid;
|
||||||
|
if ((isView || isMatView) && isDifferentThanRef)
|
||||||
|
{
|
||||||
|
dependingView = pg_rewrite->ev_class;
|
||||||
|
}
|
||||||
|
|
||||||
systable_endscan(rscan);
|
systable_endscan(rscan);
|
||||||
relation_close(rewriteRel, AccessShareLock);
|
relation_close(rewriteRel, AccessShareLock);
|
||||||
|
|
||||||
if ((isView || isMatView) && isDifferentThanRef)
|
return dependingView;
|
||||||
{
|
|
||||||
return pg_rewrite->ev_class;
|
|
||||||
}
|
|
||||||
return InvalidOid;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -373,3 +373,56 @@ GetDistributedObjectAddressList(void)
|
||||||
|
|
||||||
return objectAddressList;
|
return objectAddressList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* UpdateDistributedObjectColocationId gets an old and a new colocationId
|
||||||
|
* and updates the colocationId of all tuples in citus.pg_dist_object which
|
||||||
|
* have the old colocationId to the new colocationId.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
UpdateDistributedObjectColocationId(uint32 oldColocationId,
|
||||||
|
uint32 newColocationId)
|
||||||
|
{
|
||||||
|
const bool indexOK = false;
|
||||||
|
ScanKeyData scanKey[1];
|
||||||
|
Relation pgDistObjectRel = table_open(DistObjectRelationId(),
|
||||||
|
RowExclusiveLock);
|
||||||
|
TupleDesc tupleDescriptor = RelationGetDescr(pgDistObjectRel);
|
||||||
|
|
||||||
|
/* scan pg_dist_object for colocationId equal to old colocationId */
|
||||||
|
ScanKeyInit(&scanKey[0], Anum_pg_dist_object_colocationid,
|
||||||
|
BTEqualStrategyNumber,
|
||||||
|
F_INT4EQ, UInt32GetDatum(oldColocationId));
|
||||||
|
|
||||||
|
SysScanDesc scanDescriptor = systable_beginscan(pgDistObjectRel,
|
||||||
|
InvalidOid,
|
||||||
|
indexOK,
|
||||||
|
NULL, 1, scanKey);
|
||||||
|
HeapTuple heapTuple;
|
||||||
|
while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor)))
|
||||||
|
{
|
||||||
|
Datum values[Natts_pg_dist_object];
|
||||||
|
bool isnull[Natts_pg_dist_object];
|
||||||
|
bool replace[Natts_pg_dist_object];
|
||||||
|
|
||||||
|
memset(replace, 0, sizeof(replace));
|
||||||
|
|
||||||
|
replace[Anum_pg_dist_object_colocationid - 1] = true;
|
||||||
|
|
||||||
|
/* update the colocationId to the new one */
|
||||||
|
values[Anum_pg_dist_object_colocationid - 1] = UInt32GetDatum(newColocationId);
|
||||||
|
|
||||||
|
isnull[Anum_pg_dist_object_colocationid - 1] = false;
|
||||||
|
|
||||||
|
heapTuple = heap_modify_tuple(heapTuple, tupleDescriptor, values, isnull,
|
||||||
|
replace);
|
||||||
|
|
||||||
|
CatalogTupleUpdate(pgDistObjectRel, &heapTuple->t_self, heapTuple);
|
||||||
|
CitusInvalidateRelcacheByRelid(DistObjectRelationId());
|
||||||
|
}
|
||||||
|
|
||||||
|
systable_endscan(scanDescriptor);
|
||||||
|
table_close(pgDistObjectRel, NoLock);
|
||||||
|
CommandCounterIncrement();
|
||||||
|
}
|
||||||
|
|
|
@ -79,14 +79,24 @@ static bool DistributedTableSizeOnWorker(WorkerNode *workerNode, Oid relationId,
|
||||||
char *sizeQuery, bool failOnError,
|
char *sizeQuery, bool failOnError,
|
||||||
uint64 *tableSize);
|
uint64 *tableSize);
|
||||||
static List * ShardIntervalsOnWorkerGroup(WorkerNode *workerNode, Oid relationId);
|
static List * ShardIntervalsOnWorkerGroup(WorkerNode *workerNode, Oid relationId);
|
||||||
static char * GenerateShardNameAndSizeQueryForShardList(List *shardIntervalList);
|
static char * GenerateShardStatisticsQueryForShardList(List *shardIntervalList, bool
|
||||||
static char * GenerateAllShardNameAndSizeQueryForNode(WorkerNode *workerNode);
|
useShardMinMaxQuery);
|
||||||
static List * GenerateShardSizesQueryList(List *workerNodeList);
|
static char * GenerateAllShardStatisticsQueryForNode(WorkerNode *workerNode,
|
||||||
|
List *citusTableIds, bool
|
||||||
|
useShardMinMaxQuery);
|
||||||
|
static List * GenerateShardStatisticsQueryList(List *workerNodeList, List *citusTableIds,
|
||||||
|
bool useShardMinMaxQuery);
|
||||||
static void ErrorIfNotSuitableToGetSize(Oid relationId);
|
static void ErrorIfNotSuitableToGetSize(Oid relationId);
|
||||||
static List * OpenConnectionToNodes(List *workerNodeList);
|
static List * OpenConnectionToNodes(List *workerNodeList);
|
||||||
static void ReceiveShardNameAndSizeResults(List *connectionList,
|
static void ReceiveShardNameAndSizeResults(List *connectionList,
|
||||||
Tuplestorestate *tupleStore,
|
Tuplestorestate *tupleStore,
|
||||||
TupleDesc tupleDescriptor);
|
TupleDesc tupleDescriptor);
|
||||||
|
static void AppendShardSizeMinMaxQuery(StringInfo selectQuery, uint64 shardId,
|
||||||
|
ShardInterval *
|
||||||
|
shardInterval, char *shardName,
|
||||||
|
char *quotedShardName);
|
||||||
|
static void AppendShardSizeQuery(StringInfo selectQuery, ShardInterval *shardInterval,
|
||||||
|
char *quotedShardName);
|
||||||
|
|
||||||
/* exports for SQL callable functions */
|
/* exports for SQL callable functions */
|
||||||
PG_FUNCTION_INFO_V1(citus_table_size);
|
PG_FUNCTION_INFO_V1(citus_table_size);
|
||||||
|
@ -102,25 +112,16 @@ citus_shard_sizes(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
CheckCitusVersion(ERROR);
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
List *workerNodeList = ActivePrimaryNodeList(NoLock);
|
List *allCitusTableIds = AllCitusTableIds();
|
||||||
|
|
||||||
List *shardSizesQueryList = GenerateShardSizesQueryList(workerNodeList);
|
/* we don't need a distributed transaction here */
|
||||||
|
bool useDistributedTransaction = false;
|
||||||
|
|
||||||
List *connectionList = OpenConnectionToNodes(workerNodeList);
|
/* we only want the shard sizes here so useShardMinMaxQuery parameter is false */
|
||||||
FinishConnectionListEstablishment(connectionList);
|
bool useShardMinMaxQuery = false;
|
||||||
|
List *connectionList = SendShardStatisticsQueriesInParallel(allCitusTableIds,
|
||||||
|
useDistributedTransaction,
|
||||||
/* send commands in parallel */
|
useShardMinMaxQuery);
|
||||||
for (int i = 0; i < list_length(connectionList); i++)
|
|
||||||
{
|
|
||||||
MultiConnection *connection = (MultiConnection *) list_nth(connectionList, i);
|
|
||||||
char *shardSizesQuery = (char *) list_nth(shardSizesQueryList, i);
|
|
||||||
int querySent = SendRemoteCommand(connection, shardSizesQuery);
|
|
||||||
if (querySent == 0)
|
|
||||||
{
|
|
||||||
ReportConnectionError(connection, WARNING);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TupleDesc tupleDescriptor = NULL;
|
TupleDesc tupleDescriptor = NULL;
|
||||||
Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor);
|
Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor);
|
||||||
|
@ -225,6 +226,59 @@ citus_relation_size(PG_FUNCTION_ARGS)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SendShardStatisticsQueriesInParallel generates query lists for obtaining shard
|
||||||
|
* statistics and then sends the commands in parallel by opening connections
|
||||||
|
* to available nodes. It returns the connection list.
|
||||||
|
*/
|
||||||
|
List *
|
||||||
|
SendShardStatisticsQueriesInParallel(List *citusTableIds, bool useDistributedTransaction,
|
||||||
|
bool
|
||||||
|
useShardMinMaxQuery)
|
||||||
|
{
|
||||||
|
List *workerNodeList = ActivePrimaryNodeList(NoLock);
|
||||||
|
|
||||||
|
List *shardSizesQueryList = GenerateShardStatisticsQueryList(workerNodeList,
|
||||||
|
citusTableIds,
|
||||||
|
useShardMinMaxQuery);
|
||||||
|
|
||||||
|
List *connectionList = OpenConnectionToNodes(workerNodeList);
|
||||||
|
FinishConnectionListEstablishment(connectionList);
|
||||||
|
|
||||||
|
if (useDistributedTransaction)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* For now, in the case we want to include shard min and max values, we also
|
||||||
|
* want to update the entries in pg_dist_placement and pg_dist_shard with the
|
||||||
|
* latest statistics. In order to detect distributed deadlocks, we assign a
|
||||||
|
* distributed transaction ID to the current transaction
|
||||||
|
*/
|
||||||
|
UseCoordinatedTransaction();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* send commands in parallel */
|
||||||
|
for (int i = 0; i < list_length(connectionList); i++)
|
||||||
|
{
|
||||||
|
MultiConnection *connection = (MultiConnection *) list_nth(connectionList, i);
|
||||||
|
char *shardSizesQuery = (char *) list_nth(shardSizesQueryList, i);
|
||||||
|
|
||||||
|
if (useDistributedTransaction)
|
||||||
|
{
|
||||||
|
/* run the size query in a distributed transaction */
|
||||||
|
RemoteTransactionBeginIfNecessary(connection);
|
||||||
|
}
|
||||||
|
|
||||||
|
int querySent = SendRemoteCommand(connection, shardSizesQuery);
|
||||||
|
|
||||||
|
if (querySent == 0)
|
||||||
|
{
|
||||||
|
ReportConnectionError(connection, WARNING);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return connectionList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* OpenConnectionToNodes opens a single connection per node
|
* OpenConnectionToNodes opens a single connection per node
|
||||||
* for the given workerNodeList.
|
* for the given workerNodeList.
|
||||||
|
@ -250,20 +304,25 @@ OpenConnectionToNodes(List *workerNodeList)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GenerateShardSizesQueryList generates a query per node that
|
* GenerateShardStatisticsQueryList generates a query per node that will return:
|
||||||
* will return all shard_name, shard_size pairs from the node.
|
* - all shard_name, shard_size pairs from the node (if includeShardMinMax is false)
|
||||||
|
* - all shard_id, shard_minvalue, shard_maxvalue, shard_size quartuples from the node (if true)
|
||||||
*/
|
*/
|
||||||
static List *
|
static List *
|
||||||
GenerateShardSizesQueryList(List *workerNodeList)
|
GenerateShardStatisticsQueryList(List *workerNodeList, List *citusTableIds, bool
|
||||||
|
useShardMinMaxQuery)
|
||||||
{
|
{
|
||||||
List *shardSizesQueryList = NIL;
|
List *shardStatisticsQueryList = NIL;
|
||||||
WorkerNode *workerNode = NULL;
|
WorkerNode *workerNode = NULL;
|
||||||
foreach_ptr(workerNode, workerNodeList)
|
foreach_ptr(workerNode, workerNodeList)
|
||||||
{
|
{
|
||||||
char *shardSizesQuery = GenerateAllShardNameAndSizeQueryForNode(workerNode);
|
char *shardStatisticsQuery = GenerateAllShardStatisticsQueryForNode(workerNode,
|
||||||
shardSizesQueryList = lappend(shardSizesQueryList, shardSizesQuery);
|
citusTableIds,
|
||||||
|
useShardMinMaxQuery);
|
||||||
|
shardStatisticsQueryList = lappend(shardStatisticsQueryList,
|
||||||
|
shardStatisticsQuery);
|
||||||
}
|
}
|
||||||
return shardSizesQueryList;
|
return shardStatisticsQueryList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -572,37 +631,50 @@ GenerateSizeQueryOnMultiplePlacements(List *shardIntervalList, char *sizeQuery)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GenerateAllShardNameAndSizeQueryForNode generates a query that returns all
|
* GenerateAllShardStatisticsQueryForNode generates a query that returns:
|
||||||
* shard_name, shard_size pairs for the given node.
|
* - all shard_name, shard_size pairs for the given node (if useShardMinMaxQuery is false)
|
||||||
|
* - all shard_id, shard_minvalue, shard_maxvalue, shard_size quartuples (if true)
|
||||||
*/
|
*/
|
||||||
static char *
|
static char *
|
||||||
GenerateAllShardNameAndSizeQueryForNode(WorkerNode *workerNode)
|
GenerateAllShardStatisticsQueryForNode(WorkerNode *workerNode, List *citusTableIds, bool
|
||||||
|
useShardMinMaxQuery)
|
||||||
{
|
{
|
||||||
List *allCitusTableIds = AllCitusTableIds();
|
StringInfo allShardStatisticsQuery = makeStringInfo();
|
||||||
|
|
||||||
StringInfo allShardNameAndSizeQuery = makeStringInfo();
|
|
||||||
|
|
||||||
Oid relationId = InvalidOid;
|
Oid relationId = InvalidOid;
|
||||||
foreach_oid(relationId, allCitusTableIds)
|
foreach_oid(relationId, citusTableIds)
|
||||||
{
|
{
|
||||||
List *shardIntervalsOnNode = ShardIntervalsOnWorkerGroup(workerNode, relationId);
|
List *shardIntervalsOnNode = ShardIntervalsOnWorkerGroup(workerNode, relationId);
|
||||||
char *shardNameAndSizeQuery =
|
char *shardStatisticsQuery =
|
||||||
GenerateShardNameAndSizeQueryForShardList(shardIntervalsOnNode);
|
GenerateShardStatisticsQueryForShardList(shardIntervalsOnNode,
|
||||||
appendStringInfoString(allShardNameAndSizeQuery, shardNameAndSizeQuery);
|
useShardMinMaxQuery);
|
||||||
|
appendStringInfoString(allShardStatisticsQuery, shardStatisticsQuery);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Add a dummy entry so that UNION ALL doesn't complain */
|
/* Add a dummy entry so that UNION ALL doesn't complain */
|
||||||
appendStringInfo(allShardNameAndSizeQuery, "SELECT NULL::text, 0::bigint;");
|
if (useShardMinMaxQuery)
|
||||||
return allShardNameAndSizeQuery->data;
|
{
|
||||||
|
/* 0 for shard_id, NULL for min, NULL for text, 0 for shard_size */
|
||||||
|
appendStringInfo(allShardStatisticsQuery,
|
||||||
|
"SELECT 0::bigint, NULL::text, NULL::text, 0::bigint;");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* NULL for shard_name, 0 for shard_size */
|
||||||
|
appendStringInfo(allShardStatisticsQuery, "SELECT NULL::text, 0::bigint;");
|
||||||
|
}
|
||||||
|
return allShardStatisticsQuery->data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GenerateShardNameAndSizeQueryForShardList generates a SELECT shard_name - shard_size query to get
|
* GenerateShardStatisticsQueryForShardList generates one of the two types of queries:
|
||||||
* size of multiple tables.
|
* - SELECT shard_name - shard_size (if useShardMinMaxQuery is false)
|
||||||
|
* - SELECT shard_id, shard_minvalue, shard_maxvalue, shard_size (if true)
|
||||||
*/
|
*/
|
||||||
static char *
|
static char *
|
||||||
GenerateShardNameAndSizeQueryForShardList(List *shardIntervalList)
|
GenerateShardStatisticsQueryForShardList(List *shardIntervalList, bool
|
||||||
|
useShardMinMaxQuery)
|
||||||
{
|
{
|
||||||
StringInfo selectQuery = makeStringInfo();
|
StringInfo selectQuery = makeStringInfo();
|
||||||
|
|
||||||
|
@ -618,8 +690,15 @@ GenerateShardNameAndSizeQueryForShardList(List *shardIntervalList)
|
||||||
char *shardQualifiedName = quote_qualified_identifier(schemaName, shardName);
|
char *shardQualifiedName = quote_qualified_identifier(schemaName, shardName);
|
||||||
char *quotedShardName = quote_literal_cstr(shardQualifiedName);
|
char *quotedShardName = quote_literal_cstr(shardQualifiedName);
|
||||||
|
|
||||||
appendStringInfo(selectQuery, "SELECT %s AS shard_name, ", quotedShardName);
|
if (useShardMinMaxQuery)
|
||||||
appendStringInfo(selectQuery, PG_RELATION_SIZE_FUNCTION, quotedShardName);
|
{
|
||||||
|
AppendShardSizeMinMaxQuery(selectQuery, shardId, shardInterval, shardName,
|
||||||
|
quotedShardName);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
AppendShardSizeQuery(selectQuery, shardInterval, quotedShardName);
|
||||||
|
}
|
||||||
appendStringInfo(selectQuery, " UNION ALL ");
|
appendStringInfo(selectQuery, " UNION ALL ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -627,6 +706,54 @@ GenerateShardNameAndSizeQueryForShardList(List *shardIntervalList)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AppendShardSizeMinMaxQuery appends a query in the following form to selectQuery
|
||||||
|
* SELECT shard_id, shard_minvalue, shard_maxvalue, shard_size
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AppendShardSizeMinMaxQuery(StringInfo selectQuery, uint64 shardId,
|
||||||
|
ShardInterval *shardInterval, char *shardName,
|
||||||
|
char *quotedShardName)
|
||||||
|
{
|
||||||
|
if (IsCitusTableType(shardInterval->relationId, APPEND_DISTRIBUTED))
|
||||||
|
{
|
||||||
|
/* fill in the partition column name */
|
||||||
|
const uint32 unusedTableId = 1;
|
||||||
|
Var *partitionColumn = PartitionColumn(shardInterval->relationId,
|
||||||
|
unusedTableId);
|
||||||
|
char *partitionColumnName = get_attname(shardInterval->relationId,
|
||||||
|
partitionColumn->varattno, false);
|
||||||
|
appendStringInfo(selectQuery,
|
||||||
|
"SELECT " UINT64_FORMAT
|
||||||
|
" AS shard_id, min(%s)::text AS shard_minvalue, max(%s)::text AS shard_maxvalue, pg_relation_size(%s) AS shard_size FROM %s ",
|
||||||
|
shardId, partitionColumnName,
|
||||||
|
partitionColumnName,
|
||||||
|
quotedShardName, shardName);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* we don't need to update min/max for non-append distributed tables because they don't change */
|
||||||
|
appendStringInfo(selectQuery,
|
||||||
|
"SELECT " UINT64_FORMAT
|
||||||
|
" AS shard_id, NULL::text AS shard_minvalue, NULL::text AS shard_maxvalue, pg_relation_size(%s) AS shard_size ",
|
||||||
|
shardId, quotedShardName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AppendShardSizeQuery appends a query in the following form to selectQuery
|
||||||
|
* SELECT shard_name, shard_size
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AppendShardSizeQuery(StringInfo selectQuery, ShardInterval *shardInterval,
|
||||||
|
char *quotedShardName)
|
||||||
|
{
|
||||||
|
appendStringInfo(selectQuery, "SELECT %s AS shard_name, ", quotedShardName);
|
||||||
|
appendStringInfo(selectQuery, PG_RELATION_SIZE_FUNCTION, quotedShardName);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ErrorIfNotSuitableToGetSize determines whether the table is suitable to find
|
* ErrorIfNotSuitableToGetSize determines whether the table is suitable to find
|
||||||
* its' size with internal functions.
|
* its' size with internal functions.
|
||||||
|
@ -924,6 +1051,26 @@ ShardLength(uint64 shardId)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* NodeGroupHasLivePlacements returns true if there is any placement
|
||||||
|
* on the given node group which is not a SHARD_STATE_TO_DELETE placement.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
NodeGroupHasLivePlacements(int32 groupId)
|
||||||
|
{
|
||||||
|
List *shardPlacements = AllShardPlacementsOnNodeGroup(groupId);
|
||||||
|
GroupShardPlacement *placement = NULL;
|
||||||
|
foreach_ptr(placement, shardPlacements)
|
||||||
|
{
|
||||||
|
if (placement->shardState != SHARD_STATE_TO_DELETE)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NodeGroupHasShardPlacements returns whether any active shards are placed on the group
|
* NodeGroupHasShardPlacements returns whether any active shards are placed on the group
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -112,7 +112,7 @@ static bool UnsetMetadataSyncedForAll(void);
|
||||||
static void ErrorIfCoordinatorMetadataSetFalse(WorkerNode *workerNode, Datum value,
|
static void ErrorIfCoordinatorMetadataSetFalse(WorkerNode *workerNode, Datum value,
|
||||||
char *field);
|
char *field);
|
||||||
static WorkerNode * SetShouldHaveShards(WorkerNode *workerNode, bool shouldHaveShards);
|
static WorkerNode * SetShouldHaveShards(WorkerNode *workerNode, bool shouldHaveShards);
|
||||||
|
static void RemoveOldShardPlacementForNodeGroup(int groupId);
|
||||||
|
|
||||||
/* declarations for dynamic loading */
|
/* declarations for dynamic loading */
|
||||||
PG_FUNCTION_INFO_V1(citus_set_coordinator_host);
|
PG_FUNCTION_INFO_V1(citus_set_coordinator_host);
|
||||||
|
@ -1291,9 +1291,7 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort)
|
||||||
*/
|
*/
|
||||||
DeleteAllReferenceTablePlacementsFromNodeGroup(workerNode->groupId);
|
DeleteAllReferenceTablePlacementsFromNodeGroup(workerNode->groupId);
|
||||||
}
|
}
|
||||||
bool onlyConsiderActivePlacements = false;
|
if (NodeGroupHasLivePlacements(workerNode->groupId))
|
||||||
if (NodeGroupHasShardPlacements(workerNode->groupId,
|
|
||||||
onlyConsiderActivePlacements))
|
|
||||||
{
|
{
|
||||||
if (ClusterHasReferenceTable())
|
if (ClusterHasReferenceTable())
|
||||||
{
|
{
|
||||||
|
@ -1320,6 +1318,8 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort)
|
||||||
|
|
||||||
DeleteNodeRow(workerNode->workerName, nodePort);
|
DeleteNodeRow(workerNode->workerName, nodePort);
|
||||||
|
|
||||||
|
RemoveOldShardPlacementForNodeGroup(workerNode->groupId);
|
||||||
|
|
||||||
char *nodeDeleteCommand = NodeDeleteCommand(workerNode->nodeId);
|
char *nodeDeleteCommand = NodeDeleteCommand(workerNode->nodeId);
|
||||||
|
|
||||||
/* make sure we don't have any lingering session lifespan connections */
|
/* make sure we don't have any lingering session lifespan connections */
|
||||||
|
@ -1329,6 +1329,29 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RemoveOldShardPlacementForNodeGroup removes all old shard placements
|
||||||
|
* for the given node group from pg_dist_placement.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
RemoveOldShardPlacementForNodeGroup(int groupId)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Prevent concurrent deferred drop
|
||||||
|
*/
|
||||||
|
LockPlacementCleanup();
|
||||||
|
List *shardPlacementsOnNode = AllShardPlacementsOnNodeGroup(groupId);
|
||||||
|
GroupShardPlacement *placement = NULL;
|
||||||
|
foreach_ptr(placement, shardPlacementsOnNode)
|
||||||
|
{
|
||||||
|
if (placement->shardState == SHARD_STATE_TO_DELETE)
|
||||||
|
{
|
||||||
|
DeleteShardPlacementRow(placement->placementId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CanRemoveReferenceTablePlacements returns true if active primary
|
* CanRemoveReferenceTablePlacements returns true if active primary
|
||||||
* node count is more than 1, which means that even if we remove a node
|
* node count is more than 1, which means that even if we remove a node
|
||||||
|
@ -1384,16 +1407,34 @@ AddNodeMetadata(char *nodeName, int32 nodePort,
|
||||||
*nodeAlreadyExists = false;
|
*nodeAlreadyExists = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Take an exclusive lock on pg_dist_node to serialize node changes.
|
* Prevent / wait for concurrent modification before checking whether
|
||||||
|
* the worker already exists in pg_dist_node.
|
||||||
|
*/
|
||||||
|
LockRelationOid(DistNodeRelationId(), RowShareLock);
|
||||||
|
|
||||||
|
WorkerNode *workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort);
|
||||||
|
if (workerNode != NULL)
|
||||||
|
{
|
||||||
|
/* return early without holding locks when the node already exists */
|
||||||
|
*nodeAlreadyExists = true;
|
||||||
|
|
||||||
|
return workerNode->nodeId;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We are going to change pg_dist_node, prevent any concurrent reads that
|
||||||
|
* are not tolerant to concurrent node addition by taking an exclusive
|
||||||
|
* lock (conflicts with all but AccessShareLock).
|
||||||
|
*
|
||||||
* We may want to relax or have more fine-grained locking in the future
|
* We may want to relax or have more fine-grained locking in the future
|
||||||
* to allow users to add multiple nodes concurrently.
|
* to allow users to add multiple nodes concurrently.
|
||||||
*/
|
*/
|
||||||
LockRelationOid(DistNodeRelationId(), ExclusiveLock);
|
LockRelationOid(DistNodeRelationId(), ExclusiveLock);
|
||||||
|
|
||||||
WorkerNode *workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort);
|
/* recheck in case 2 node additions pass the first check concurrently */
|
||||||
|
workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort);
|
||||||
if (workerNode != NULL)
|
if (workerNode != NULL)
|
||||||
{
|
{
|
||||||
/* fill return data and return */
|
|
||||||
*nodeAlreadyExists = true;
|
*nodeAlreadyExists = true;
|
||||||
|
|
||||||
return workerNode->nodeId;
|
return workerNode->nodeId;
|
||||||
|
@ -1800,7 +1841,7 @@ InsertPlaceholderCoordinatorRecord(void)
|
||||||
bool nodeAlreadyExists = false;
|
bool nodeAlreadyExists = false;
|
||||||
|
|
||||||
/* as long as there is a single node, localhost should be ok */
|
/* as long as there is a single node, localhost should be ok */
|
||||||
AddNodeMetadata("localhost", PostPortNumber, &nodeMetadata, &nodeAlreadyExists);
|
AddNodeMetadata(LocalHostName, PostPortNumber, &nodeMetadata, &nodeAlreadyExists);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -332,7 +332,7 @@ DropShards(Oid relationId, char *schemaName, char *relationName,
|
||||||
*/
|
*/
|
||||||
if (MultiShardCommitProtocol == COMMIT_PROTOCOL_2PC)
|
if (MultiShardCommitProtocol == COMMIT_PROTOCOL_2PC)
|
||||||
{
|
{
|
||||||
CoordinatedTransactionUse2PC();
|
CoordinatedTransactionShouldUse2PC();
|
||||||
}
|
}
|
||||||
|
|
||||||
List *dropTaskList = DropTaskList(relationId, schemaName, relationName,
|
List *dropTaskList = DropTaskList(relationId, schemaName, relationName,
|
||||||
|
|
|
@ -85,6 +85,7 @@ PG_FUNCTION_INFO_V1(master_get_table_ddl_events);
|
||||||
PG_FUNCTION_INFO_V1(master_get_new_shardid);
|
PG_FUNCTION_INFO_V1(master_get_new_shardid);
|
||||||
PG_FUNCTION_INFO_V1(master_get_new_placementid);
|
PG_FUNCTION_INFO_V1(master_get_new_placementid);
|
||||||
PG_FUNCTION_INFO_V1(master_get_active_worker_nodes);
|
PG_FUNCTION_INFO_V1(master_get_active_worker_nodes);
|
||||||
|
PG_FUNCTION_INFO_V1(citus_get_active_worker_nodes);
|
||||||
PG_FUNCTION_INFO_V1(master_get_round_robin_candidate_nodes);
|
PG_FUNCTION_INFO_V1(master_get_round_robin_candidate_nodes);
|
||||||
PG_FUNCTION_INFO_V1(master_stage_shard_row);
|
PG_FUNCTION_INFO_V1(master_stage_shard_row);
|
||||||
PG_FUNCTION_INFO_V1(master_stage_shard_placement_row);
|
PG_FUNCTION_INFO_V1(master_stage_shard_placement_row);
|
||||||
|
@ -442,12 +443,12 @@ master_stage_shard_placement_row(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* master_get_active_worker_nodes returns a set of active worker host names and
|
* citus_get_active_worker_nodes returns a set of active worker host names and
|
||||||
* port numbers in deterministic order. Currently we assume that all worker
|
* port numbers in deterministic order. Currently we assume that all worker
|
||||||
* nodes in pg_dist_node are active.
|
* nodes in pg_dist_node are active.
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
master_get_active_worker_nodes(PG_FUNCTION_ARGS)
|
citus_get_active_worker_nodes(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
FuncCallContext *functionContext = NULL;
|
FuncCallContext *functionContext = NULL;
|
||||||
uint32 workerNodeIndex = 0;
|
uint32 workerNodeIndex = 0;
|
||||||
|
@ -512,6 +513,16 @@ master_get_active_worker_nodes(PG_FUNCTION_ARGS)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* master_get_active_worker_nodes is a wrapper function for old UDF name.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
master_get_active_worker_nodes(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
return citus_get_active_worker_nodes(fcinfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Finds the relationId from a potentially qualified relation name. */
|
/* Finds the relationId from a potentially qualified relation name. */
|
||||||
Oid
|
Oid
|
||||||
ResolveRelationId(text *relationName, bool missingOk)
|
ResolveRelationId(text *relationName, bool missingOk)
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
#include "distributed/coordinator_protocol.h"
|
#include "distributed/coordinator_protocol.h"
|
||||||
#include "distributed/metadata_cache.h"
|
#include "distributed/metadata_cache.h"
|
||||||
#include "distributed/shard_cleaner.h"
|
#include "distributed/shard_cleaner.h"
|
||||||
|
#include "distributed/resource_lock.h"
|
||||||
#include "distributed/worker_transaction.h"
|
#include "distributed/worker_transaction.h"
|
||||||
|
|
||||||
|
|
||||||
|
@ -23,7 +24,7 @@
|
||||||
PG_FUNCTION_INFO_V1(master_defer_delete_shards);
|
PG_FUNCTION_INFO_V1(master_defer_delete_shards);
|
||||||
|
|
||||||
|
|
||||||
static int DropMarkedShards(void);
|
static int DropMarkedShards(bool waitForCleanupLock);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -44,7 +45,8 @@ master_defer_delete_shards(PG_FUNCTION_ARGS)
|
||||||
CheckCitusVersion(ERROR);
|
CheckCitusVersion(ERROR);
|
||||||
EnsureCoordinator();
|
EnsureCoordinator();
|
||||||
|
|
||||||
int droppedShardCount = DropMarkedShards();
|
bool waitForCleanupLock = true;
|
||||||
|
int droppedShardCount = DropMarkedShards(waitForCleanupLock);
|
||||||
|
|
||||||
PG_RETURN_INT32(droppedShardCount);
|
PG_RETURN_INT32(droppedShardCount);
|
||||||
}
|
}
|
||||||
|
@ -55,14 +57,14 @@ master_defer_delete_shards(PG_FUNCTION_ARGS)
|
||||||
* any errors to make it safe to use in the maintenance daemon.
|
* any errors to make it safe to use in the maintenance daemon.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
TryDropMarkedShards(void)
|
TryDropMarkedShards(bool waitForCleanupLock)
|
||||||
{
|
{
|
||||||
int droppedShardCount = 0;
|
int droppedShardCount = 0;
|
||||||
MemoryContext savedContext = CurrentMemoryContext;
|
MemoryContext savedContext = CurrentMemoryContext;
|
||||||
|
|
||||||
PG_TRY();
|
PG_TRY();
|
||||||
{
|
{
|
||||||
droppedShardCount = DropMarkedShards();
|
droppedShardCount = DropMarkedShards(waitForCleanupLock);
|
||||||
}
|
}
|
||||||
PG_CATCH();
|
PG_CATCH();
|
||||||
{
|
{
|
||||||
|
@ -88,9 +90,15 @@ TryDropMarkedShards(void)
|
||||||
* group and continues with others. The group that has been skipped will be
|
* group and continues with others. The group that has been skipped will be
|
||||||
* removed at a later time when there are no locks held anymore on those
|
* removed at a later time when there are no locks held anymore on those
|
||||||
* placements.
|
* placements.
|
||||||
|
*
|
||||||
|
* Before doing any of this it will take an exclusive PlacementCleanup lock.
|
||||||
|
* This is to ensure that this function is not being run concurrently.
|
||||||
|
* Otherwise really bad race conditions are possible, such as removing all
|
||||||
|
* placements of a shard. waitForCleanupLock indicates if this function should
|
||||||
|
* wait for this lock or returns with a warning.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
DropMarkedShards(void)
|
DropMarkedShards(bool waitForCleanupLock)
|
||||||
{
|
{
|
||||||
int removedShardCount = 0;
|
int removedShardCount = 0;
|
||||||
ListCell *shardPlacementCell = NULL;
|
ListCell *shardPlacementCell = NULL;
|
||||||
|
@ -100,6 +108,16 @@ DropMarkedShards(void)
|
||||||
return removedShardCount;
|
return removedShardCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (waitForCleanupLock)
|
||||||
|
{
|
||||||
|
LockPlacementCleanup();
|
||||||
|
}
|
||||||
|
else if (!TryLockPlacementCleanup())
|
||||||
|
{
|
||||||
|
ereport(WARNING, (errmsg("could not acquire lock to cleanup placements")));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
List *shardPlacementList = AllShardPlacementsWithShardPlacementState(
|
List *shardPlacementList = AllShardPlacementsWithShardPlacementState(
|
||||||
SHARD_STATE_TO_DELETE);
|
SHARD_STATE_TO_DELETE);
|
||||||
foreach(shardPlacementCell, shardPlacementList)
|
foreach(shardPlacementCell, shardPlacementList)
|
||||||
|
|
|
@ -30,7 +30,6 @@
|
||||||
#include "distributed/connection_management.h"
|
#include "distributed/connection_management.h"
|
||||||
#include "distributed/enterprise.h"
|
#include "distributed/enterprise.h"
|
||||||
#include "distributed/hash_helpers.h"
|
#include "distributed/hash_helpers.h"
|
||||||
#include "distributed/intermediate_result_pruning.h"
|
|
||||||
#include "distributed/listutils.h"
|
#include "distributed/listutils.h"
|
||||||
#include "distributed/coordinator_protocol.h"
|
#include "distributed/coordinator_protocol.h"
|
||||||
#include "distributed/metadata_cache.h"
|
#include "distributed/metadata_cache.h"
|
||||||
|
@ -647,12 +646,12 @@ SetupRebalanceMonitor(List *placementUpdateList, Oid relationId)
|
||||||
List *colocatedUpdateList = GetColocatedRebalanceSteps(placementUpdateList);
|
List *colocatedUpdateList = GetColocatedRebalanceSteps(placementUpdateList);
|
||||||
ListCell *colocatedUpdateCell = NULL;
|
ListCell *colocatedUpdateCell = NULL;
|
||||||
|
|
||||||
ProgressMonitorData *monitor = CreateProgressMonitor(REBALANCE_ACTIVITY_MAGIC_NUMBER,
|
dsm_handle dsmHandle;
|
||||||
list_length(colocatedUpdateList),
|
ProgressMonitorData *monitor = CreateProgressMonitor(
|
||||||
sizeof(
|
list_length(colocatedUpdateList),
|
||||||
PlacementUpdateEventProgress),
|
sizeof(PlacementUpdateEventProgress),
|
||||||
relationId);
|
&dsmHandle);
|
||||||
PlacementUpdateEventProgress *rebalanceSteps = monitor->steps;
|
PlacementUpdateEventProgress *rebalanceSteps = ProgressMonitorSteps(monitor);
|
||||||
|
|
||||||
int32 eventIndex = 0;
|
int32 eventIndex = 0;
|
||||||
foreach(colocatedUpdateCell, colocatedUpdateList)
|
foreach(colocatedUpdateCell, colocatedUpdateList)
|
||||||
|
@ -670,6 +669,7 @@ SetupRebalanceMonitor(List *placementUpdateList, Oid relationId)
|
||||||
|
|
||||||
eventIndex++;
|
eventIndex++;
|
||||||
}
|
}
|
||||||
|
RegisterProgressMonitor(REBALANCE_ACTIVITY_MAGIC_NUMBER, relationId, dsmHandle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -814,7 +814,7 @@ citus_drain_node(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
char *nodeName = text_to_cstring(nodeNameText);
|
char *nodeName = text_to_cstring(nodeNameText);
|
||||||
int connectionFlag = FORCE_NEW_CONNECTION;
|
int connectionFlag = FORCE_NEW_CONNECTION;
|
||||||
MultiConnection *connection = GetNodeConnection(connectionFlag, LOCAL_HOST_NAME,
|
MultiConnection *connection = GetNodeConnection(connectionFlag, LocalHostName,
|
||||||
PostPortNumber);
|
PostPortNumber);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -972,7 +972,6 @@ Datum
|
||||||
get_rebalance_progress(PG_FUNCTION_ARGS)
|
get_rebalance_progress(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
List *segmentList = NIL;
|
List *segmentList = NIL;
|
||||||
ListCell *rebalanceMonitorCell = NULL;
|
|
||||||
TupleDesc tupdesc;
|
TupleDesc tupdesc;
|
||||||
Tuplestorestate *tupstore = SetupTuplestore(fcinfo, &tupdesc);
|
Tuplestorestate *tupstore = SetupTuplestore(fcinfo, &tupdesc);
|
||||||
|
|
||||||
|
@ -980,11 +979,11 @@ get_rebalance_progress(PG_FUNCTION_ARGS)
|
||||||
List *rebalanceMonitorList = ProgressMonitorList(REBALANCE_ACTIVITY_MAGIC_NUMBER,
|
List *rebalanceMonitorList = ProgressMonitorList(REBALANCE_ACTIVITY_MAGIC_NUMBER,
|
||||||
&segmentList);
|
&segmentList);
|
||||||
|
|
||||||
foreach(rebalanceMonitorCell, rebalanceMonitorList)
|
ProgressMonitorData *monitor = NULL;
|
||||||
|
foreach_ptr(monitor, rebalanceMonitorList)
|
||||||
{
|
{
|
||||||
ProgressMonitorData *monitor = lfirst(rebalanceMonitorCell);
|
PlacementUpdateEventProgress *placementUpdateEvents = ProgressMonitorSteps(
|
||||||
PlacementUpdateEventProgress *placementUpdateEvents = monitor->steps;
|
monitor);
|
||||||
|
|
||||||
for (int eventIndex = 0; eventIndex < monitor->stepCount; eventIndex++)
|
for (int eventIndex = 0; eventIndex < monitor->stepCount; eventIndex++)
|
||||||
{
|
{
|
||||||
PlacementUpdateEventProgress *step = placementUpdateEvents + eventIndex;
|
PlacementUpdateEventProgress *step = placementUpdateEvents + eventIndex;
|
||||||
|
@ -1201,7 +1200,7 @@ UpdateShardPlacement(PlacementUpdateEvent *placementUpdateEvent,
|
||||||
sourceNode->workerPort,
|
sourceNode->workerPort,
|
||||||
REBALANCE_PROGRESS_MOVING);
|
REBALANCE_PROGRESS_MOVING);
|
||||||
|
|
||||||
MultiConnection *connection = GetNodeConnection(connectionFlag, LOCAL_HOST_NAME,
|
MultiConnection *connection = GetNodeConnection(connectionFlag, LocalHostName,
|
||||||
PostPortNumber);
|
PostPortNumber);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2141,9 +2140,9 @@ UpdateColocatedShardPlacementProgress(uint64 shardId, char *sourceName, int sour
|
||||||
{
|
{
|
||||||
ProgressMonitorData *header = GetCurrentProgressMonitor();
|
ProgressMonitorData *header = GetCurrentProgressMonitor();
|
||||||
|
|
||||||
if (header != NULL && header->steps != NULL)
|
if (header != NULL)
|
||||||
{
|
{
|
||||||
PlacementUpdateEventProgress *steps = header->steps;
|
PlacementUpdateEventProgress *steps = ProgressMonitorSteps(header);
|
||||||
ListCell *colocatedShardIntervalCell = NULL;
|
ListCell *colocatedShardIntervalCell = NULL;
|
||||||
|
|
||||||
ShardInterval *shardInterval = LoadShardInterval(shardId);
|
ShardInterval *shardInterval = LoadShardInterval(shardId);
|
||||||
|
|
|
@ -32,7 +32,9 @@
|
||||||
#include "distributed/connection_management.h"
|
#include "distributed/connection_management.h"
|
||||||
#include "distributed/deparse_shard_query.h"
|
#include "distributed/deparse_shard_query.h"
|
||||||
#include "distributed/distributed_planner.h"
|
#include "distributed/distributed_planner.h"
|
||||||
|
#include "distributed/foreign_key_relationship.h"
|
||||||
#include "distributed/listutils.h"
|
#include "distributed/listutils.h"
|
||||||
|
#include "distributed/lock_graph.h"
|
||||||
#include "distributed/multi_client_executor.h"
|
#include "distributed/multi_client_executor.h"
|
||||||
#include "distributed/multi_executor.h"
|
#include "distributed/multi_executor.h"
|
||||||
#include "distributed/metadata_utility.h"
|
#include "distributed/metadata_utility.h"
|
||||||
|
@ -65,12 +67,22 @@ static List * RelationShardListForShardCreate(ShardInterval *shardInterval);
|
||||||
static bool WorkerShardStats(ShardPlacement *placement, Oid relationId,
|
static bool WorkerShardStats(ShardPlacement *placement, Oid relationId,
|
||||||
const char *shardName, uint64 *shardSize,
|
const char *shardName, uint64 *shardSize,
|
||||||
text **shardMinValue, text **shardMaxValue);
|
text **shardMinValue, text **shardMaxValue);
|
||||||
|
static void UpdateTableStatistics(Oid relationId);
|
||||||
|
static void ReceiveAndUpdateShardsSizeAndMinMax(List *connectionList);
|
||||||
|
static void UpdateShardSizeAndMinMax(uint64 shardId, ShardInterval *shardInterval, Oid
|
||||||
|
relationId, List *shardPlacementList, uint64
|
||||||
|
shardSize, text *shardMinValue,
|
||||||
|
text *shardMaxValue);
|
||||||
|
static bool ProcessShardStatisticsRow(PGresult *result, int64 rowIndex, uint64 *shardId,
|
||||||
|
text **shardMinValue, text **shardMaxValue,
|
||||||
|
uint64 *shardSize);
|
||||||
|
|
||||||
/* exports for SQL callable functions */
|
/* exports for SQL callable functions */
|
||||||
PG_FUNCTION_INFO_V1(master_create_empty_shard);
|
PG_FUNCTION_INFO_V1(master_create_empty_shard);
|
||||||
PG_FUNCTION_INFO_V1(master_append_table_to_shard);
|
PG_FUNCTION_INFO_V1(master_append_table_to_shard);
|
||||||
PG_FUNCTION_INFO_V1(citus_update_shard_statistics);
|
PG_FUNCTION_INFO_V1(citus_update_shard_statistics);
|
||||||
PG_FUNCTION_INFO_V1(master_update_shard_statistics);
|
PG_FUNCTION_INFO_V1(master_update_shard_statistics);
|
||||||
|
PG_FUNCTION_INFO_V1(citus_update_table_statistics);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -361,6 +373,23 @@ citus_update_shard_statistics(PG_FUNCTION_ARGS)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* citus_update_table_statistics updates metadata (shard size and shard min/max
|
||||||
|
* values) of the shards of the given table
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
citus_update_table_statistics(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
Oid distributedTableId = PG_GETARG_OID(0);
|
||||||
|
|
||||||
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
|
UpdateTableStatistics(distributedTableId);
|
||||||
|
|
||||||
|
PG_RETURN_VOID();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* master_update_shard_statistics is a wrapper function for old UDF name.
|
* master_update_shard_statistics is a wrapper function for old UDF name.
|
||||||
*/
|
*/
|
||||||
|
@ -782,7 +811,6 @@ UpdateShardStatistics(int64 shardId)
|
||||||
{
|
{
|
||||||
ShardInterval *shardInterval = LoadShardInterval(shardId);
|
ShardInterval *shardInterval = LoadShardInterval(shardId);
|
||||||
Oid relationId = shardInterval->relationId;
|
Oid relationId = shardInterval->relationId;
|
||||||
char storageType = shardInterval->storageType;
|
|
||||||
bool statsOK = false;
|
bool statsOK = false;
|
||||||
uint64 shardSize = 0;
|
uint64 shardSize = 0;
|
||||||
text *minValue = NULL;
|
text *minValue = NULL;
|
||||||
|
@ -825,17 +853,166 @@ UpdateShardStatistics(int64 shardId)
|
||||||
errdetail("Setting shard statistics to NULL")));
|
errdetail("Setting shard statistics to NULL")));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* make sure we don't process cancel signals */
|
UpdateShardSizeAndMinMax(shardId, shardInterval, relationId, shardPlacementList,
|
||||||
HOLD_INTERRUPTS();
|
shardSize, minValue, maxValue);
|
||||||
|
return shardSize;
|
||||||
|
}
|
||||||
|
|
||||||
/* update metadata for each shard placement we appended to */
|
|
||||||
|
/*
|
||||||
|
* UpdateTableStatistics updates metadata (shard size and shard min/max values)
|
||||||
|
* of the shards of the given table. Follows a similar logic to citus_shard_sizes function.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
UpdateTableStatistics(Oid relationId)
|
||||||
|
{
|
||||||
|
List *citusTableIds = NIL;
|
||||||
|
citusTableIds = lappend_oid(citusTableIds, relationId);
|
||||||
|
|
||||||
|
/* we want to use a distributed transaction here to detect distributed deadlocks */
|
||||||
|
bool useDistributedTransaction = true;
|
||||||
|
|
||||||
|
/* we also want shard min/max values for append distributed tables */
|
||||||
|
bool useShardMinMaxQuery = true;
|
||||||
|
|
||||||
|
List *connectionList = SendShardStatisticsQueriesInParallel(citusTableIds,
|
||||||
|
useDistributedTransaction,
|
||||||
|
useShardMinMaxQuery);
|
||||||
|
|
||||||
|
ReceiveAndUpdateShardsSizeAndMinMax(connectionList);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ReceiveAndUpdateShardsSizeAndMinMax receives shard id, size
|
||||||
|
* and min max results from the given connection list, and updates
|
||||||
|
* respective entries in pg_dist_placement and pg_dist_shard
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
ReceiveAndUpdateShardsSizeAndMinMax(List *connectionList)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* From the connection list, we will not get all the shards, but
|
||||||
|
* all the placements. We use a hash table to remember already visited shard ids
|
||||||
|
* since we update all the different placements of a shard id at once.
|
||||||
|
*/
|
||||||
|
HTAB *alreadyVisitedShardPlacements = CreateOidVisitedHashSet();
|
||||||
|
|
||||||
|
MultiConnection *connection = NULL;
|
||||||
|
foreach_ptr(connection, connectionList)
|
||||||
|
{
|
||||||
|
if (PQstatus(connection->pgConn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool raiseInterrupts = true;
|
||||||
|
PGresult *result = GetRemoteCommandResult(connection, raiseInterrupts);
|
||||||
|
if (!IsResponseOK(result))
|
||||||
|
{
|
||||||
|
ReportResultError(connection, result, WARNING);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
int64 rowCount = PQntuples(result);
|
||||||
|
int64 colCount = PQnfields(result);
|
||||||
|
|
||||||
|
/* Although it is not expected */
|
||||||
|
if (colCount != UPDATE_SHARD_STATISTICS_COLUMN_COUNT)
|
||||||
|
{
|
||||||
|
ereport(WARNING, (errmsg("unexpected number of columns from "
|
||||||
|
"citus_update_table_statistics")));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int64 rowIndex = 0; rowIndex < rowCount; rowIndex++)
|
||||||
|
{
|
||||||
|
uint64 shardId = 0;
|
||||||
|
text *shardMinValue = NULL;
|
||||||
|
text *shardMaxValue = NULL;
|
||||||
|
uint64 shardSize = 0;
|
||||||
|
|
||||||
|
if (!ProcessShardStatisticsRow(result, rowIndex, &shardId, &shardMinValue,
|
||||||
|
&shardMaxValue, &shardSize))
|
||||||
|
{
|
||||||
|
/* this row has no valid shard statistics */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OidVisited(alreadyVisitedShardPlacements, shardId))
|
||||||
|
{
|
||||||
|
/* We have already updated this placement list */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
VisitOid(alreadyVisitedShardPlacements, shardId);
|
||||||
|
|
||||||
|
ShardInterval *shardInterval = LoadShardInterval(shardId);
|
||||||
|
Oid relationId = shardInterval->relationId;
|
||||||
|
List *shardPlacementList = ActiveShardPlacementList(shardId);
|
||||||
|
|
||||||
|
UpdateShardSizeAndMinMax(shardId, shardInterval, relationId,
|
||||||
|
shardPlacementList, shardSize, shardMinValue,
|
||||||
|
shardMaxValue);
|
||||||
|
}
|
||||||
|
PQclear(result);
|
||||||
|
ForgetResults(connection);
|
||||||
|
}
|
||||||
|
hash_destroy(alreadyVisitedShardPlacements);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ProcessShardStatisticsRow processes a row of shard statistics of the input PGresult
|
||||||
|
* - it returns true if this row belongs to a valid shard
|
||||||
|
* - it returns false if this row has no valid shard statistics (shardId = INVALID_SHARD_ID)
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
ProcessShardStatisticsRow(PGresult *result, int64 rowIndex, uint64 *shardId,
|
||||||
|
text **shardMinValue, text **shardMaxValue, uint64 *shardSize)
|
||||||
|
{
|
||||||
|
*shardId = ParseIntField(result, rowIndex, 0);
|
||||||
|
|
||||||
|
/* check for the dummy entries we put so that UNION ALL wouldn't complain */
|
||||||
|
if (*shardId == INVALID_SHARD_ID)
|
||||||
|
{
|
||||||
|
/* this row has no valid shard statistics */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *minValueResult = PQgetvalue(result, rowIndex, 1);
|
||||||
|
char *maxValueResult = PQgetvalue(result, rowIndex, 2);
|
||||||
|
*shardMinValue = cstring_to_text(minValueResult);
|
||||||
|
*shardMaxValue = cstring_to_text(maxValueResult);
|
||||||
|
*shardSize = ParseIntField(result, rowIndex, 3);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* UpdateShardSizeAndMinMax updates the shardlength (shard size) of the given
|
||||||
|
* shard and its placements in pg_dist_placement, and updates the shard min value
|
||||||
|
* and shard max value of the given shard in pg_dist_shard if the relationId belongs
|
||||||
|
* to an append-distributed table
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
UpdateShardSizeAndMinMax(uint64 shardId, ShardInterval *shardInterval, Oid relationId,
|
||||||
|
List *shardPlacementList, uint64 shardSize, text *shardMinValue,
|
||||||
|
text *shardMaxValue)
|
||||||
|
{
|
||||||
|
char storageType = shardInterval->storageType;
|
||||||
|
|
||||||
|
ShardPlacement *placement = NULL;
|
||||||
|
|
||||||
|
/* update metadata for each shard placement */
|
||||||
foreach_ptr(placement, shardPlacementList)
|
foreach_ptr(placement, shardPlacementList)
|
||||||
{
|
{
|
||||||
uint64 placementId = placement->placementId;
|
uint64 placementId = placement->placementId;
|
||||||
int32 groupId = placement->groupId;
|
int32 groupId = placement->groupId;
|
||||||
|
|
||||||
DeleteShardPlacementRow(placementId);
|
DeleteShardPlacementRow(placementId);
|
||||||
InsertShardPlacementRow(shardId, placementId, SHARD_STATE_ACTIVE, shardSize,
|
InsertShardPlacementRow(shardId, placementId, SHARD_STATE_ACTIVE,
|
||||||
|
shardSize,
|
||||||
groupId);
|
groupId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -843,18 +1020,9 @@ UpdateShardStatistics(int64 shardId)
|
||||||
if (IsCitusTableType(relationId, APPEND_DISTRIBUTED))
|
if (IsCitusTableType(relationId, APPEND_DISTRIBUTED))
|
||||||
{
|
{
|
||||||
DeleteShardRow(shardId);
|
DeleteShardRow(shardId);
|
||||||
InsertShardRow(relationId, shardId, storageType, minValue, maxValue);
|
InsertShardRow(relationId, shardId, storageType, shardMinValue,
|
||||||
|
shardMaxValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (QueryCancelPending)
|
|
||||||
{
|
|
||||||
ereport(WARNING, (errmsg("cancel requests are ignored during metadata update")));
|
|
||||||
QueryCancelPending = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
RESUME_INTERRUPTS();
|
|
||||||
|
|
||||||
return shardSize;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -38,10 +38,8 @@
|
||||||
#include "utils/rel.h"
|
#include "utils/rel.h"
|
||||||
#include "utils/syscache.h"
|
#include "utils/syscache.h"
|
||||||
|
|
||||||
static void UpdateTaskQueryString(Query *query, Oid distributedTableId,
|
|
||||||
RangeTblEntry *valuesRTE, Task *task);
|
static void UpdateTaskQueryString(Query *query, Task *task);
|
||||||
static bool ReplaceRelationConstraintByShardConstraint(List *relationShardList,
|
|
||||||
OnConflictExpr *onConflict);
|
|
||||||
static RelationShard * FindRelationShard(Oid inputRelationId, List *relationShardList);
|
static RelationShard * FindRelationShard(Oid inputRelationId, List *relationShardList);
|
||||||
static void ConvertRteToSubqueryWithEmptyResult(RangeTblEntry *rte);
|
static void ConvertRteToSubqueryWithEmptyResult(RangeTblEntry *rte);
|
||||||
static bool ShouldLazyDeparseQuery(Task *task);
|
static bool ShouldLazyDeparseQuery(Task *task);
|
||||||
|
@ -57,27 +55,43 @@ RebuildQueryStrings(Job *workerJob)
|
||||||
{
|
{
|
||||||
Query *originalQuery = workerJob->jobQuery;
|
Query *originalQuery = workerJob->jobQuery;
|
||||||
List *taskList = workerJob->taskList;
|
List *taskList = workerJob->taskList;
|
||||||
Oid relationId = ((RangeTblEntry *) linitial(originalQuery->rtable))->relid;
|
|
||||||
RangeTblEntry *valuesRTE = ExtractDistributedInsertValuesRTE(originalQuery);
|
|
||||||
|
|
||||||
Task *task = NULL;
|
Task *task = NULL;
|
||||||
|
|
||||||
|
if (originalQuery->commandType == CMD_INSERT)
|
||||||
|
{
|
||||||
|
AddInsertAliasIfNeeded(originalQuery);
|
||||||
|
}
|
||||||
|
|
||||||
foreach_ptr(task, taskList)
|
foreach_ptr(task, taskList)
|
||||||
{
|
{
|
||||||
Query *query = originalQuery;
|
Query *query = originalQuery;
|
||||||
|
|
||||||
if (UpdateOrDeleteQuery(query) && list_length(taskList) > 1)
|
/*
|
||||||
|
* Copy the query if there are multiple tasks. If there is a single
|
||||||
|
* task, we scribble on the original query to avoid the copying
|
||||||
|
* overhead.
|
||||||
|
*/
|
||||||
|
if (list_length(taskList) > 1)
|
||||||
{
|
{
|
||||||
query = copyObject(originalQuery);
|
query = copyObject(originalQuery);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (UpdateOrDeleteQuery(query))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* For UPDATE and DELETE queries, we may have subqueries and joins, so
|
||||||
|
* we use relation shard list to update shard names and call
|
||||||
|
* pg_get_query_def() directly.
|
||||||
|
*/
|
||||||
|
List *relationShardList = task->relationShardList;
|
||||||
|
UpdateRelationToShardNames((Node *) query, relationShardList);
|
||||||
|
}
|
||||||
else if (query->commandType == CMD_INSERT && task->modifyWithSubquery)
|
else if (query->commandType == CMD_INSERT && task->modifyWithSubquery)
|
||||||
{
|
{
|
||||||
/* for INSERT..SELECT, adjust shard names in SELECT part */
|
/* for INSERT..SELECT, adjust shard names in SELECT part */
|
||||||
List *relationShardList = task->relationShardList;
|
List *relationShardList = task->relationShardList;
|
||||||
ShardInterval *shardInterval = LoadShardInterval(task->anchorShardId);
|
ShardInterval *shardInterval = LoadShardInterval(task->anchorShardId);
|
||||||
|
|
||||||
query = copyObject(originalQuery);
|
|
||||||
|
|
||||||
RangeTblEntry *copiedInsertRte = ExtractResultRelationRTEOrError(query);
|
RangeTblEntry *copiedInsertRte = ExtractResultRelationRTEOrError(query);
|
||||||
RangeTblEntry *copiedSubqueryRte = ExtractSelectRangeTableEntry(query);
|
RangeTblEntry *copiedSubqueryRte = ExtractSelectRangeTableEntry(query);
|
||||||
Query *copiedSubquery = copiedSubqueryRte->subquery;
|
Query *copiedSubquery = copiedSubqueryRte->subquery;
|
||||||
|
@ -90,29 +104,18 @@ RebuildQueryStrings(Job *workerJob)
|
||||||
|
|
||||||
ReorderInsertSelectTargetLists(query, copiedInsertRte, copiedSubqueryRte);
|
ReorderInsertSelectTargetLists(query, copiedInsertRte, copiedSubqueryRte);
|
||||||
|
|
||||||
/* setting an alias simplifies deparsing of RETURNING */
|
|
||||||
if (copiedInsertRte->alias == NULL)
|
|
||||||
{
|
|
||||||
Alias *alias = makeAlias(CITUS_TABLE_ALIAS, NIL);
|
|
||||||
copiedInsertRte->alias = alias;
|
|
||||||
}
|
|
||||||
|
|
||||||
UpdateRelationToShardNames((Node *) copiedSubquery, relationShardList);
|
UpdateRelationToShardNames((Node *) copiedSubquery, relationShardList);
|
||||||
}
|
}
|
||||||
else if (query->commandType == CMD_INSERT && (query->onConflict != NULL ||
|
|
||||||
valuesRTE != NULL))
|
if (query->commandType == CMD_INSERT)
|
||||||
{
|
{
|
||||||
|
RangeTblEntry *modifiedRelationRTE = linitial(originalQuery->rtable);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Always an alias in UPSERTs and multi-row INSERTs to avoid
|
* We store the modified relaiton ID in the task so we can lazily call
|
||||||
* deparsing issues (e.g. RETURNING might reference the original
|
* deparse_shard_query when the string is needed
|
||||||
* table name, which has been replaced by a shard name).
|
|
||||||
*/
|
*/
|
||||||
RangeTblEntry *rangeTableEntry = linitial(query->rtable);
|
task->anchorDistributedTableId = modifiedRelationRTE->relid;
|
||||||
if (rangeTableEntry->alias == NULL)
|
|
||||||
{
|
|
||||||
Alias *alias = makeAlias(CITUS_TABLE_ALIAS, NIL);
|
|
||||||
rangeTableEntry->alias = alias;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isQueryObjectOrText = GetTaskQueryType(task) == TASK_QUERY_TEXT ||
|
bool isQueryObjectOrText = GetTaskQueryType(task) == TASK_QUERY_TEXT ||
|
||||||
|
@ -122,7 +125,7 @@ RebuildQueryStrings(Job *workerJob)
|
||||||
? "(null)"
|
? "(null)"
|
||||||
: ApplyLogRedaction(TaskQueryString(task)))));
|
: ApplyLogRedaction(TaskQueryString(task)))));
|
||||||
|
|
||||||
UpdateTaskQueryString(query, relationId, valuesRTE, task);
|
UpdateTaskQueryString(query, task);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If parameters were resolved in the job query, then they are now also
|
* If parameters were resolved in the job query, then they are now also
|
||||||
|
@ -136,54 +139,69 @@ RebuildQueryStrings(Job *workerJob)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AddInsertAliasIfNeeded adds an alias in UPSERTs and multi-row INSERTs to avoid
|
||||||
|
* deparsing issues (e.g. RETURNING might reference the original table name,
|
||||||
|
* which has been replaced by a shard name).
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
AddInsertAliasIfNeeded(Query *query)
|
||||||
|
{
|
||||||
|
Assert(query->commandType == CMD_INSERT);
|
||||||
|
|
||||||
|
if (query->onConflict == NULL &&
|
||||||
|
ExtractDistributedInsertValuesRTE(query) == NULL)
|
||||||
|
{
|
||||||
|
/* simple single-row insert does not need an alias */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
RangeTblEntry *rangeTableEntry = linitial(query->rtable);
|
||||||
|
if (rangeTableEntry->alias != NULL)
|
||||||
|
{
|
||||||
|
/* INSERT already has an alias */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Alias *alias = makeAlias(CITUS_TABLE_ALIAS, NIL);
|
||||||
|
rangeTableEntry->alias = alias;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* UpdateTaskQueryString updates the query string stored within the provided
|
* UpdateTaskQueryString updates the query string stored within the provided
|
||||||
* Task. If the Task has row values from a multi-row INSERT, those are injected
|
* Task. If the Task has row values from a multi-row INSERT, those are injected
|
||||||
* into the provided query (using the provided valuesRTE, which must belong to
|
* into the provided query before deparse occurs (the query's full VALUES list
|
||||||
* the query) before deparse occurs (the query's full VALUES list will be
|
* will be restored before this function returns).
|
||||||
* restored before this function returns).
|
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
UpdateTaskQueryString(Query *query, Oid distributedTableId, RangeTblEntry *valuesRTE,
|
UpdateTaskQueryString(Query *query, Task *task)
|
||||||
Task *task)
|
|
||||||
{
|
{
|
||||||
List *oldValuesLists = NIL;
|
List *oldValuesLists = NIL;
|
||||||
|
RangeTblEntry *valuesRTE = NULL;
|
||||||
if (valuesRTE != NULL)
|
|
||||||
{
|
|
||||||
Assert(valuesRTE->rtekind == RTE_VALUES);
|
|
||||||
Assert(task->rowValuesLists != NULL);
|
|
||||||
|
|
||||||
oldValuesLists = valuesRTE->values_lists;
|
|
||||||
valuesRTE->values_lists = task->rowValuesLists;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (query->commandType != CMD_INSERT)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* For UPDATE and DELETE queries, we may have subqueries and joins, so
|
|
||||||
* we use relation shard list to update shard names and call
|
|
||||||
* pg_get_query_def() directly.
|
|
||||||
*/
|
|
||||||
List *relationShardList = task->relationShardList;
|
|
||||||
UpdateRelationToShardNames((Node *) query, relationShardList);
|
|
||||||
}
|
|
||||||
else if (ShouldLazyDeparseQuery(task))
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* not all insert queries are copied before calling this
|
|
||||||
* function, so we do it here
|
|
||||||
*/
|
|
||||||
query = copyObject(query);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (query->commandType == CMD_INSERT)
|
if (query->commandType == CMD_INSERT)
|
||||||
{
|
{
|
||||||
/*
|
/* extract the VALUES from the INSERT */
|
||||||
* We store this in the task so we can lazily call
|
valuesRTE = ExtractDistributedInsertValuesRTE(query);
|
||||||
* deparse_shard_query when the string is needed
|
|
||||||
*/
|
if (valuesRTE != NULL)
|
||||||
task->anchorDistributedTableId = distributedTableId;
|
{
|
||||||
|
Assert(valuesRTE->rtekind == RTE_VALUES);
|
||||||
|
Assert(task->rowValuesLists != NULL);
|
||||||
|
|
||||||
|
oldValuesLists = valuesRTE->values_lists;
|
||||||
|
valuesRTE->values_lists = task->rowValuesLists;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ShouldLazyDeparseQuery(task))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* not all insert queries are copied before calling this
|
||||||
|
* function, so we do it here
|
||||||
|
*/
|
||||||
|
query = copyObject(query);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SetTaskQueryIfShouldLazyDeparse(task, query);
|
SetTaskQueryIfShouldLazyDeparse(task, query);
|
||||||
|
@ -266,124 +284,6 @@ UpdateRelationToShardNames(Node *node, List *relationShardList)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* UpdateRelationsToLocalShardTables walks over the query tree and appends shard ids to
|
|
||||||
* relations. The caller is responsible for ensuring that the resulting Query can
|
|
||||||
* be executed locally.
|
|
||||||
*/
|
|
||||||
bool
|
|
||||||
UpdateRelationsToLocalShardTables(Node *node, List *relationShardList)
|
|
||||||
{
|
|
||||||
if (node == NULL)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* want to look at all RTEs, even in subqueries, CTEs and such */
|
|
||||||
if (IsA(node, Query))
|
|
||||||
{
|
|
||||||
return query_tree_walker((Query *) node, UpdateRelationsToLocalShardTables,
|
|
||||||
relationShardList, QTW_EXAMINE_RTES_BEFORE);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (IsA(node, OnConflictExpr))
|
|
||||||
{
|
|
||||||
OnConflictExpr *onConflict = (OnConflictExpr *) node;
|
|
||||||
|
|
||||||
return ReplaceRelationConstraintByShardConstraint(relationShardList, onConflict);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!IsA(node, RangeTblEntry))
|
|
||||||
{
|
|
||||||
return expression_tree_walker(node, UpdateRelationsToLocalShardTables,
|
|
||||||
relationShardList);
|
|
||||||
}
|
|
||||||
|
|
||||||
RangeTblEntry *newRte = (RangeTblEntry *) node;
|
|
||||||
|
|
||||||
if (newRte->rtekind != RTE_RELATION)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
RelationShard *relationShard = FindRelationShard(newRte->relid,
|
|
||||||
relationShardList);
|
|
||||||
|
|
||||||
/* the function should only be called with local shards */
|
|
||||||
if (relationShard == NULL)
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
Oid shardOid = GetTableLocalShardOid(relationShard->relationId,
|
|
||||||
relationShard->shardId);
|
|
||||||
|
|
||||||
newRte->relid = shardOid;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* ReplaceRelationConstraintByShardConstraint replaces given OnConflictExpr's
|
|
||||||
* constraint id with constraint id of the corresponding shard.
|
|
||||||
*/
|
|
||||||
static bool
|
|
||||||
ReplaceRelationConstraintByShardConstraint(List *relationShardList,
|
|
||||||
OnConflictExpr *onConflict)
|
|
||||||
{
|
|
||||||
Oid constraintId = onConflict->constraint;
|
|
||||||
|
|
||||||
if (!OidIsValid(constraintId))
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
Oid constraintRelationId = InvalidOid;
|
|
||||||
|
|
||||||
HeapTuple heapTuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(constraintId));
|
|
||||||
if (HeapTupleIsValid(heapTuple))
|
|
||||||
{
|
|
||||||
Form_pg_constraint contup = (Form_pg_constraint) GETSTRUCT(heapTuple);
|
|
||||||
|
|
||||||
constraintRelationId = contup->conrelid;
|
|
||||||
ReleaseSysCache(heapTuple);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We can return here without calling the walker function, since we know there
|
|
||||||
* will be no possible tables or constraints after this point, by the syntax.
|
|
||||||
*/
|
|
||||||
if (!OidIsValid(constraintRelationId))
|
|
||||||
{
|
|
||||||
ereport(ERROR, (errmsg("Invalid relation id (%u) for constraint: %s",
|
|
||||||
constraintRelationId, get_constraint_name(constraintId))));
|
|
||||||
}
|
|
||||||
|
|
||||||
RelationShard *relationShard = FindRelationShard(constraintRelationId,
|
|
||||||
relationShardList);
|
|
||||||
|
|
||||||
if (relationShard != NULL)
|
|
||||||
{
|
|
||||||
char *constraintName = get_constraint_name(constraintId);
|
|
||||||
|
|
||||||
AppendShardIdToName(&constraintName, relationShard->shardId);
|
|
||||||
|
|
||||||
Oid shardOid = GetTableLocalShardOid(relationShard->relationId,
|
|
||||||
relationShard->shardId);
|
|
||||||
|
|
||||||
Oid shardConstraintId = get_relation_constraint_oid(shardOid, constraintName,
|
|
||||||
false);
|
|
||||||
|
|
||||||
onConflict->constraint = shardConstraintId;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* FindRelationShard finds the RelationShard for shard relation with
|
* FindRelationShard finds the RelationShard for shard relation with
|
||||||
* given Oid if exists in given relationShardList. Otherwise, returns NULL.
|
* given Oid if exists in given relationShardList. Otherwise, returns NULL.
|
||||||
|
|
|
@ -49,6 +49,7 @@
|
||||||
#include "executor/executor.h"
|
#include "executor/executor.h"
|
||||||
#include "nodes/makefuncs.h"
|
#include "nodes/makefuncs.h"
|
||||||
#include "nodes/nodeFuncs.h"
|
#include "nodes/nodeFuncs.h"
|
||||||
|
#include "nodes/pg_list.h"
|
||||||
#include "parser/parsetree.h"
|
#include "parser/parsetree.h"
|
||||||
#include "parser/parse_type.h"
|
#include "parser/parse_type.h"
|
||||||
#if PG_VERSION_NUM >= PG_VERSION_12
|
#if PG_VERSION_NUM >= PG_VERSION_12
|
||||||
|
@ -98,6 +99,7 @@ static PlannedStmt * FinalizeNonRouterPlan(PlannedStmt *localPlan,
|
||||||
DistributedPlan *distributedPlan,
|
DistributedPlan *distributedPlan,
|
||||||
CustomScan *customScan);
|
CustomScan *customScan);
|
||||||
static PlannedStmt * FinalizeRouterPlan(PlannedStmt *localPlan, CustomScan *customScan);
|
static PlannedStmt * FinalizeRouterPlan(PlannedStmt *localPlan, CustomScan *customScan);
|
||||||
|
static AppendRelInfo * FindTargetAppendRelInfo(PlannerInfo *root, int relationRteIndex);
|
||||||
static List * makeTargetListFromCustomScanList(List *custom_scan_tlist);
|
static List * makeTargetListFromCustomScanList(List *custom_scan_tlist);
|
||||||
static List * makeCustomScanTargetlistFromExistingTargetList(List *existingTargetlist);
|
static List * makeCustomScanTargetlistFromExistingTargetList(List *existingTargetlist);
|
||||||
static int32 BlessRecordExpressionList(List *exprs);
|
static int32 BlessRecordExpressionList(List *exprs);
|
||||||
|
@ -124,6 +126,7 @@ static PlannedStmt * PlanFastPathDistributedStmt(DistributedPlanningContext *pla
|
||||||
static PlannedStmt * PlanDistributedStmt(DistributedPlanningContext *planContext,
|
static PlannedStmt * PlanDistributedStmt(DistributedPlanningContext *planContext,
|
||||||
int rteIdCounter);
|
int rteIdCounter);
|
||||||
static RTEListProperties * GetRTEListProperties(List *rangeTableList);
|
static RTEListProperties * GetRTEListProperties(List *rangeTableList);
|
||||||
|
static List * TranslatedVars(PlannerInfo *root, int relationIndex);
|
||||||
|
|
||||||
|
|
||||||
/* Distributed planner hook */
|
/* Distributed planner hook */
|
||||||
|
@ -165,30 +168,29 @@ distributed_planner(Query *parse,
|
||||||
.boundParams = boundParams,
|
.boundParams = boundParams,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (fastPathRouterQuery)
|
if (needsDistributedPlanning)
|
||||||
{
|
|
||||||
/*
|
|
||||||
* We need to copy the parse tree because the FastPathPlanner modifies
|
|
||||||
* it. In the next branch we do the same for other distributed queries
|
|
||||||
* too, but for those it needs to be done AFTER calling
|
|
||||||
* AssignRTEIdentities.
|
|
||||||
*/
|
|
||||||
planContext.originalQuery = copyObject(parse);
|
|
||||||
}
|
|
||||||
else if (needsDistributedPlanning)
|
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* standard_planner scribbles on it's input, but for deparsing we need the
|
* standard_planner scribbles on it's input, but for deparsing we need the
|
||||||
* unmodified form. Note that before copying we call
|
* unmodified form. Before copying we call AssignRTEIdentities to be able
|
||||||
* AssignRTEIdentities, which is needed because these identities need
|
* to match RTEs in the rewritten query tree with those in the original
|
||||||
* to be present in the copied query too.
|
* tree.
|
||||||
*/
|
*/
|
||||||
rteIdCounter = AssignRTEIdentities(rangeTableList, rteIdCounter);
|
rteIdCounter = AssignRTEIdentities(rangeTableList, rteIdCounter);
|
||||||
|
|
||||||
planContext.originalQuery = copyObject(parse);
|
planContext.originalQuery = copyObject(parse);
|
||||||
|
|
||||||
bool setPartitionedTablesInherited = false;
|
/*
|
||||||
AdjustPartitioningForDistributedPlanning(rangeTableList,
|
* When there are partitioned tables (not applicable to fast path),
|
||||||
setPartitionedTablesInherited);
|
* pretend that they are regular tables to avoid unnecessary work
|
||||||
|
* in standard_planner.
|
||||||
|
*/
|
||||||
|
if (!fastPathRouterQuery)
|
||||||
|
{
|
||||||
|
bool setPartitionedTablesInherited = false;
|
||||||
|
AdjustPartitioningForDistributedPlanning(rangeTableList,
|
||||||
|
setPartitionedTablesInherited);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -447,7 +449,7 @@ AssignRTEIdentity(RangeTblEntry *rangeTableEntry, int rteIdentifier)
|
||||||
{
|
{
|
||||||
Assert(rangeTableEntry->rtekind == RTE_RELATION);
|
Assert(rangeTableEntry->rtekind == RTE_RELATION);
|
||||||
|
|
||||||
rangeTableEntry->values_lists = list_make1_int(rteIdentifier);
|
rangeTableEntry->values_lists = list_make2_int(rteIdentifier, rangeTableEntry->inh);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -458,12 +460,24 @@ GetRTEIdentity(RangeTblEntry *rte)
|
||||||
Assert(rte->rtekind == RTE_RELATION);
|
Assert(rte->rtekind == RTE_RELATION);
|
||||||
Assert(rte->values_lists != NIL);
|
Assert(rte->values_lists != NIL);
|
||||||
Assert(IsA(rte->values_lists, IntList));
|
Assert(IsA(rte->values_lists, IntList));
|
||||||
Assert(list_length(rte->values_lists) == 1);
|
Assert(list_length(rte->values_lists) == 2);
|
||||||
|
|
||||||
return linitial_int(rte->values_lists);
|
return linitial_int(rte->values_lists);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetOriginalInh gets the original value of the inheritance flag set by
|
||||||
|
* AssignRTEIdentity. The planner resets this flag in the rewritten query,
|
||||||
|
* but we need it during deparsing.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
GetOriginalInh(RangeTblEntry *rte)
|
||||||
|
{
|
||||||
|
return lsecond_int(rte->values_lists);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GetQueryLockMode returns the necessary lock mode to be acquired for the
|
* GetQueryLockMode returns the necessary lock mode to be acquired for the
|
||||||
* given query. (See comment written in RangeTblEntry->rellockmode)
|
* given query. (See comment written in RangeTblEntry->rellockmode)
|
||||||
|
@ -1814,6 +1828,8 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo,
|
||||||
|
|
||||||
/* see comments on GetVarFromAssignedParam() */
|
/* see comments on GetVarFromAssignedParam() */
|
||||||
relationRestriction->outerPlanParamsList = OuterPlanParamsList(root);
|
relationRestriction->outerPlanParamsList = OuterPlanParamsList(root);
|
||||||
|
relationRestriction->translatedVars = TranslatedVars(root,
|
||||||
|
relationRestriction->index);
|
||||||
|
|
||||||
RelationRestrictionContext *relationRestrictionContext =
|
RelationRestrictionContext *relationRestrictionContext =
|
||||||
plannerRestrictionContext->relationRestrictionContext;
|
plannerRestrictionContext->relationRestrictionContext;
|
||||||
|
@ -1837,6 +1853,61 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TranslatedVars deep copies the translated vars for the given relation index
|
||||||
|
* if there is any append rel list.
|
||||||
|
*/
|
||||||
|
static List *
|
||||||
|
TranslatedVars(PlannerInfo *root, int relationIndex)
|
||||||
|
{
|
||||||
|
List *translatedVars = NIL;
|
||||||
|
|
||||||
|
if (root->append_rel_list != NIL)
|
||||||
|
{
|
||||||
|
AppendRelInfo *targetAppendRelInfo =
|
||||||
|
FindTargetAppendRelInfo(root, relationIndex);
|
||||||
|
if (targetAppendRelInfo != NULL)
|
||||||
|
{
|
||||||
|
/* postgres deletes translated_vars after pg13, hence we deep copy them here */
|
||||||
|
Node *targetNode = NULL;
|
||||||
|
foreach_ptr(targetNode, targetAppendRelInfo->translated_vars)
|
||||||
|
{
|
||||||
|
translatedVars =
|
||||||
|
lappend(translatedVars, copyObject(targetNode));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return translatedVars;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FindTargetAppendRelInfo finds the target append rel info for the given
|
||||||
|
* relation rte index.
|
||||||
|
*/
|
||||||
|
static AppendRelInfo *
|
||||||
|
FindTargetAppendRelInfo(PlannerInfo *root, int relationRteIndex)
|
||||||
|
{
|
||||||
|
AppendRelInfo *appendRelInfo = NULL;
|
||||||
|
|
||||||
|
/* iterate on the queries that are part of UNION ALL subselects */
|
||||||
|
foreach_ptr(appendRelInfo, root->append_rel_list)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We're only interested in the child rel that is equal to the
|
||||||
|
* relation we're investigating. Here we don't need to find the offset
|
||||||
|
* because postgres adds an offset to child_relid and parent_relid after
|
||||||
|
* calling multi_relation_restriction_hook.
|
||||||
|
*/
|
||||||
|
if (appendRelInfo->child_relid == relationRteIndex)
|
||||||
|
{
|
||||||
|
return appendRelInfo;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* AdjustReadIntermediateResultCost adjusts the row count and total cost
|
* AdjustReadIntermediateResultCost adjusts the row count and total cost
|
||||||
* of a read_intermediate_result call based on the file size.
|
* of a read_intermediate_result call based on the file size.
|
||||||
|
@ -2143,6 +2214,33 @@ CreateAndPushPlannerRestrictionContext(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TranslatedVarsForRteIdentity gets an rteIdentity and returns the
|
||||||
|
* translatedVars that belong to the range table relation. If no
|
||||||
|
* translatedVars found, the function returns NIL;
|
||||||
|
*/
|
||||||
|
List *
|
||||||
|
TranslatedVarsForRteIdentity(int rteIdentity)
|
||||||
|
{
|
||||||
|
PlannerRestrictionContext *currentPlannerRestrictionContext =
|
||||||
|
CurrentPlannerRestrictionContext();
|
||||||
|
|
||||||
|
List *relationRestrictionList =
|
||||||
|
currentPlannerRestrictionContext->relationRestrictionContext->
|
||||||
|
relationRestrictionList;
|
||||||
|
RelationRestriction *relationRestriction = NULL;
|
||||||
|
foreach_ptr(relationRestriction, relationRestrictionList)
|
||||||
|
{
|
||||||
|
if (GetRTEIdentity(relationRestriction->rte) == rteIdentity)
|
||||||
|
{
|
||||||
|
return relationRestriction->translatedVars;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CurrentRestrictionContext returns the most recently added
|
* CurrentRestrictionContext returns the most recently added
|
||||||
* PlannerRestrictionContext from the plannerRestrictionContextList list.
|
* PlannerRestrictionContext from the plannerRestrictionContextList list.
|
||||||
|
|
|
@ -16,7 +16,9 @@
|
||||||
#include "distributed/local_plan_cache.h"
|
#include "distributed/local_plan_cache.h"
|
||||||
#include "distributed/deparse_shard_query.h"
|
#include "distributed/deparse_shard_query.h"
|
||||||
#include "distributed/citus_ruleutils.h"
|
#include "distributed/citus_ruleutils.h"
|
||||||
|
#include "distributed/insert_select_planner.h"
|
||||||
#include "distributed/metadata_cache.h"
|
#include "distributed/metadata_cache.h"
|
||||||
|
#include "distributed/multi_executor.h"
|
||||||
#include "distributed/version_compat.h"
|
#include "distributed/version_compat.h"
|
||||||
#if PG_VERSION_NUM >= PG_VERSION_12
|
#if PG_VERSION_NUM >= PG_VERSION_12
|
||||||
#include "optimizer/optimizer.h"
|
#include "optimizer/optimizer.h"
|
||||||
|
@ -26,13 +28,21 @@
|
||||||
#include "optimizer/clauses.h"
|
#include "optimizer/clauses.h"
|
||||||
|
|
||||||
|
|
||||||
|
static Query * GetLocalShardQueryForCache(Query *jobQuery, Task *task,
|
||||||
|
ParamListInfo paramListInfo);
|
||||||
|
static char * DeparseLocalShardQuery(Query *jobQuery, List *relationShardList,
|
||||||
|
Oid anchorDistributedTableId, int64 anchorShardId);
|
||||||
|
static int ExtractParameterTypesForParamListInfo(ParamListInfo originalParamListInfo,
|
||||||
|
Oid **parameterTypes);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CacheLocalPlanForShardQuery replaces the relation OIDs in the job query
|
* CacheLocalPlanForShardQuery replaces the relation OIDs in the job query
|
||||||
* with shard relation OIDs and then plans the query and caches the result
|
* with shard relation OIDs and then plans the query and caches the result
|
||||||
* in the originalDistributedPlan (which may be preserved across executions).
|
* in the originalDistributedPlan (which may be preserved across executions).
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
CacheLocalPlanForShardQuery(Task *task, DistributedPlan *originalDistributedPlan)
|
CacheLocalPlanForShardQuery(Task *task, DistributedPlan *originalDistributedPlan,
|
||||||
|
ParamListInfo paramListInfo)
|
||||||
{
|
{
|
||||||
PlannedStmt *localPlan = GetCachedLocalPlan(task, originalDistributedPlan);
|
PlannedStmt *localPlan = GetCachedLocalPlan(task, originalDistributedPlan);
|
||||||
if (localPlan != NULL)
|
if (localPlan != NULL)
|
||||||
|
@ -58,14 +68,14 @@ CacheLocalPlanForShardQuery(Task *task, DistributedPlan *originalDistributedPlan
|
||||||
* We prefer to use jobQuery (over task->query) because we don't want any
|
* We prefer to use jobQuery (over task->query) because we don't want any
|
||||||
* functions/params to have been evaluated in the cached plan.
|
* functions/params to have been evaluated in the cached plan.
|
||||||
*/
|
*/
|
||||||
Query *shardQuery = copyObject(originalDistributedPlan->workerJob->jobQuery);
|
Query *jobQuery = copyObject(originalDistributedPlan->workerJob->jobQuery);
|
||||||
|
|
||||||
UpdateRelationsToLocalShardTables((Node *) shardQuery, task->relationShardList);
|
Query *localShardQuery = GetLocalShardQueryForCache(jobQuery, task, paramListInfo);
|
||||||
|
|
||||||
LOCKMODE lockMode = GetQueryLockMode(shardQuery);
|
LOCKMODE lockMode = GetQueryLockMode(localShardQuery);
|
||||||
|
|
||||||
/* fast path queries can only have a single RTE by definition */
|
/* fast path queries can only have a single RTE by definition */
|
||||||
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) linitial(shardQuery->rtable);
|
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) linitial(localShardQuery->rtable);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the shard has been created in this transction, we wouldn't see the relationId
|
* If the shard has been created in this transction, we wouldn't see the relationId
|
||||||
|
@ -73,24 +83,16 @@ CacheLocalPlanForShardQuery(Task *task, DistributedPlan *originalDistributedPlan
|
||||||
*/
|
*/
|
||||||
if (rangeTableEntry->relid == InvalidOid)
|
if (rangeTableEntry->relid == InvalidOid)
|
||||||
{
|
{
|
||||||
pfree(shardQuery);
|
pfree(jobQuery);
|
||||||
|
pfree(localShardQuery);
|
||||||
MemoryContextSwitchTo(oldContext);
|
MemoryContextSwitchTo(oldContext);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IsLoggableLevel(DEBUG5))
|
|
||||||
{
|
|
||||||
StringInfo queryString = makeStringInfo();
|
|
||||||
pg_get_query_def(shardQuery, queryString);
|
|
||||||
|
|
||||||
ereport(DEBUG5, (errmsg("caching plan for query: %s",
|
|
||||||
queryString->data)));
|
|
||||||
}
|
|
||||||
|
|
||||||
LockRelationOid(rangeTableEntry->relid, lockMode);
|
LockRelationOid(rangeTableEntry->relid, lockMode);
|
||||||
|
|
||||||
LocalPlannedStatement *localPlannedStatement = CitusMakeNode(LocalPlannedStatement);
|
LocalPlannedStatement *localPlannedStatement = CitusMakeNode(LocalPlannedStatement);
|
||||||
localPlan = planner_compat(shardQuery, 0, NULL);
|
localPlan = planner_compat(localShardQuery, 0, NULL);
|
||||||
localPlannedStatement->localPlan = localPlan;
|
localPlannedStatement->localPlan = localPlan;
|
||||||
localPlannedStatement->shardId = task->anchorShardId;
|
localPlannedStatement->shardId = task->anchorShardId;
|
||||||
localPlannedStatement->localGroupId = GetLocalGroupId();
|
localPlannedStatement->localGroupId = GetLocalGroupId();
|
||||||
|
@ -103,6 +105,130 @@ CacheLocalPlanForShardQuery(Task *task, DistributedPlan *originalDistributedPlan
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetLocalShardQueryForCache is a helper function which generates
|
||||||
|
* the local shard query based on the jobQuery. The function should
|
||||||
|
* not be used for generic purposes, it is specialized for local cached
|
||||||
|
* queries.
|
||||||
|
*
|
||||||
|
* It is not guaranteed to have consistent attribute numbers on the shards
|
||||||
|
* and on the shell (e.g., distributed/reference tables) due to DROP COLUMN
|
||||||
|
* commands.
|
||||||
|
*
|
||||||
|
* To avoid any edge cases due to such discrepancies, we first deparse the
|
||||||
|
* jobQuery with the tables replaced to shards, and parse the query string
|
||||||
|
* back. This is normally a very expensive operation, however we only do it
|
||||||
|
* once per cached local plan, which is acceptable.
|
||||||
|
*/
|
||||||
|
static Query *
|
||||||
|
GetLocalShardQueryForCache(Query *jobQuery, Task *task, ParamListInfo orig_paramListInfo)
|
||||||
|
{
|
||||||
|
char *shardQueryString =
|
||||||
|
DeparseLocalShardQuery(jobQuery, task->relationShardList,
|
||||||
|
task->anchorDistributedTableId,
|
||||||
|
task->anchorShardId);
|
||||||
|
ereport(DEBUG5, (errmsg("Local shard query that is going to be cached: %s",
|
||||||
|
shardQueryString)));
|
||||||
|
|
||||||
|
Oid *parameterTypes = NULL;
|
||||||
|
int numberOfParameters =
|
||||||
|
ExtractParameterTypesForParamListInfo(orig_paramListInfo, ¶meterTypes);
|
||||||
|
|
||||||
|
Query *localShardQuery =
|
||||||
|
ParseQueryString(shardQueryString, parameterTypes, numberOfParameters);
|
||||||
|
|
||||||
|
return localShardQuery;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DeparseLocalShardQuery is a helper function to deparse given jobQuery for the shard(s)
|
||||||
|
* identified by the relationShardList, anchorDistributedTableId and anchorShardId.
|
||||||
|
*
|
||||||
|
* For the details and comparison with TaskQueryString(), see the comments in the function.
|
||||||
|
*/
|
||||||
|
static char *
|
||||||
|
DeparseLocalShardQuery(Query *jobQuery, List *relationShardList, Oid
|
||||||
|
anchorDistributedTableId, int64 anchorShardId)
|
||||||
|
{
|
||||||
|
StringInfo queryString = makeStringInfo();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We imitate what TaskQueryString() does, but we cannot rely on that function
|
||||||
|
* as the parameters might have been already resolved on the QueryTree in the
|
||||||
|
* task. Instead, we operate on the jobQuery where are sure that the
|
||||||
|
* coordination evaluation has not happened.
|
||||||
|
*
|
||||||
|
* Local shard queries are only applicable for local cached query execution.
|
||||||
|
* In the local cached query execution mode, we can use a query structure
|
||||||
|
* (or query string) with unevaluated expressions as we allow function calls
|
||||||
|
* to be evaluated when the query on the shard is executed (e.g., do no have
|
||||||
|
* coordinator evaluation, instead let Postgres executor evaluate values).
|
||||||
|
*
|
||||||
|
* Additionally, we can allow them to be evaluated again because they are stable,
|
||||||
|
* and we do not cache plans / use unevaluated query strings for queries containing
|
||||||
|
* volatile functions.
|
||||||
|
*/
|
||||||
|
if (jobQuery->commandType == CMD_INSERT)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We currently do not support INSERT .. SELECT here. To support INSERT..SELECT
|
||||||
|
* queries, we should update the relation names to shard names in the SELECT
|
||||||
|
* clause (e.g., UpdateRelationToShardNames()).
|
||||||
|
*/
|
||||||
|
Assert(!CheckInsertSelectQuery(jobQuery));
|
||||||
|
|
||||||
|
AddInsertAliasIfNeeded(jobQuery);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For INSERT queries we cannot use pg_get_query_def. Mainly because we
|
||||||
|
* cannot run UpdateRelationToShardNames on an INSERT query. This is
|
||||||
|
* because the PG deparsing logic fails when trying to insert into a
|
||||||
|
* RTE_FUNCTION (which is what will happen if you call
|
||||||
|
* UpdateRelationToShardNames).
|
||||||
|
*/
|
||||||
|
deparse_shard_query(jobQuery, anchorDistributedTableId, anchorShardId,
|
||||||
|
queryString);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
UpdateRelationToShardNames((Node *) jobQuery, relationShardList);
|
||||||
|
|
||||||
|
pg_get_query_def(jobQuery, queryString);
|
||||||
|
}
|
||||||
|
|
||||||
|
return queryString->data;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ExtractParameterTypesForParamListInfo is a helper function which helps to
|
||||||
|
* extract the parameter types of the given ParamListInfo via the second
|
||||||
|
* parameter of the function.
|
||||||
|
*
|
||||||
|
* The function also returns the number of parameters. If no parameter exists,
|
||||||
|
* the function returns 0.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
ExtractParameterTypesForParamListInfo(ParamListInfo originalParamListInfo,
|
||||||
|
Oid **parameterTypes)
|
||||||
|
{
|
||||||
|
*parameterTypes = NULL;
|
||||||
|
|
||||||
|
int numberOfParameters = 0;
|
||||||
|
if (originalParamListInfo != NULL)
|
||||||
|
{
|
||||||
|
const char **parameterValues = NULL;
|
||||||
|
ParamListInfo paramListInfo = copyParamList(originalParamListInfo);
|
||||||
|
ExtractParametersForLocalExecution(paramListInfo, parameterTypes,
|
||||||
|
¶meterValues);
|
||||||
|
numberOfParameters = paramListInfo->numParams;
|
||||||
|
}
|
||||||
|
|
||||||
|
return numberOfParameters;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GetCachedLocalPlan is a helper function which return the cached
|
* GetCachedLocalPlan is a helper function which return the cached
|
||||||
* plan in the distributedPlan for the given task if exists.
|
* plan in the distributedPlan for the given task if exists.
|
||||||
|
|
|
@ -322,10 +322,6 @@ static Node * WorkerLimitCount(Node *limitCount, Node *limitOffset, OrderByLimit
|
||||||
static List * WorkerSortClauseList(Node *limitCount,
|
static List * WorkerSortClauseList(Node *limitCount,
|
||||||
List *groupClauseList, List *sortClauseList,
|
List *groupClauseList, List *sortClauseList,
|
||||||
OrderByLimitReference orderByLimitReference);
|
OrderByLimitReference orderByLimitReference);
|
||||||
static List * GenerateNewTargetEntriesForSortClauses(List *originalTargetList,
|
|
||||||
List *sortClauseList,
|
|
||||||
AttrNumber *targetProjectionNumber,
|
|
||||||
Index *nextSortGroupRefIndex);
|
|
||||||
static bool CanPushDownLimitApproximate(List *sortClauseList, List *targetList);
|
static bool CanPushDownLimitApproximate(List *sortClauseList, List *targetList);
|
||||||
static bool HasOrderByAggregate(List *sortClauseList, List *targetList);
|
static bool HasOrderByAggregate(List *sortClauseList, List *targetList);
|
||||||
static bool HasOrderByNonCommutativeAggregate(List *sortClauseList, List *targetList);
|
static bool HasOrderByNonCommutativeAggregate(List *sortClauseList, List *targetList);
|
||||||
|
@ -1624,7 +1620,19 @@ MasterAggregateExpression(Aggref *originalAggregate,
|
||||||
Expr *directarg;
|
Expr *directarg;
|
||||||
foreach_ptr(directarg, originalAggregate->aggdirectargs)
|
foreach_ptr(directarg, originalAggregate->aggdirectargs)
|
||||||
{
|
{
|
||||||
if (!IsA(directarg, Const) && !IsA(directarg, Param))
|
/*
|
||||||
|
* Need to replace nodes that contain any Vars with Vars referring
|
||||||
|
* to the related column of the result set returned for the worker
|
||||||
|
* aggregation.
|
||||||
|
*
|
||||||
|
* When there are no Vars, then the expression can be fully evaluated
|
||||||
|
* on the coordinator, so we skip it here. This is not just an
|
||||||
|
* optimization, but the result of the expression might require
|
||||||
|
* calling the final function of the aggregate, and doing so when
|
||||||
|
* there are no input rows (i.e.: with an empty tuple slot) is not
|
||||||
|
* desirable for the node-executor methods.
|
||||||
|
*/
|
||||||
|
if (pull_var_clause_default((Node *) directarg) != NIL)
|
||||||
{
|
{
|
||||||
Var *var = makeVar(masterTableId, walkerContext->columnId,
|
Var *var = makeVar(masterTableId, walkerContext->columnId,
|
||||||
exprType((Node *) directarg),
|
exprType((Node *) directarg),
|
||||||
|
@ -2705,38 +2713,6 @@ ProcessWindowFunctionsForWorkerQuery(List *windowClauseList,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
WindowClause *windowClause = NULL;
|
|
||||||
foreach_ptr(windowClause, windowClauseList)
|
|
||||||
{
|
|
||||||
List *partitionClauseTargetList =
|
|
||||||
GenerateNewTargetEntriesForSortClauses(originalTargetEntryList,
|
|
||||||
windowClause->partitionClause,
|
|
||||||
&(queryTargetList->
|
|
||||||
targetProjectionNumber),
|
|
||||||
queryWindowClause->
|
|
||||||
nextSortGroupRefIndex);
|
|
||||||
List *orderClauseTargetList =
|
|
||||||
GenerateNewTargetEntriesForSortClauses(originalTargetEntryList,
|
|
||||||
windowClause->orderClause,
|
|
||||||
&(queryTargetList->
|
|
||||||
targetProjectionNumber),
|
|
||||||
queryWindowClause->
|
|
||||||
nextSortGroupRefIndex);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Note that even Citus does push down the window clauses as-is, we may still need to
|
|
||||||
* add the generated entries to the target list. The reason is that the same aggregates
|
|
||||||
* might be referred from another target entry that is a bare aggregate (e.g., no window
|
|
||||||
* functions), which would have been mutated. For instance, when an average aggregate
|
|
||||||
* is mutated on the target list, the window function would refer to a sum aggregate,
|
|
||||||
* which is obviously wrong.
|
|
||||||
*/
|
|
||||||
queryTargetList->targetEntryList = list_concat(queryTargetList->targetEntryList,
|
|
||||||
partitionClauseTargetList);
|
|
||||||
queryTargetList->targetEntryList = list_concat(queryTargetList->targetEntryList,
|
|
||||||
orderClauseTargetList);
|
|
||||||
}
|
|
||||||
|
|
||||||
queryWindowClause->workerWindowClauseList = windowClauseList;
|
queryWindowClause->workerWindowClauseList = windowClauseList;
|
||||||
queryWindowClause->hasWindowFunctions = true;
|
queryWindowClause->hasWindowFunctions = true;
|
||||||
}
|
}
|
||||||
|
@ -2802,19 +2778,6 @@ ProcessLimitOrderByForWorkerQuery(OrderByLimitReference orderByLimitReference,
|
||||||
groupClauseList,
|
groupClauseList,
|
||||||
sortClauseList,
|
sortClauseList,
|
||||||
orderByLimitReference);
|
orderByLimitReference);
|
||||||
|
|
||||||
/*
|
|
||||||
* TODO: Do we really need to add the target entries if we're not pushing
|
|
||||||
* down ORDER BY?
|
|
||||||
*/
|
|
||||||
List *newTargetEntryListForSortClauses =
|
|
||||||
GenerateNewTargetEntriesForSortClauses(originalTargetList,
|
|
||||||
queryOrderByLimit->workerSortClauseList,
|
|
||||||
&(queryTargetList->targetProjectionNumber),
|
|
||||||
queryOrderByLimit->nextSortGroupRefIndex);
|
|
||||||
|
|
||||||
queryTargetList->targetEntryList =
|
|
||||||
list_concat(queryTargetList->targetEntryList, newTargetEntryListForSortClauses);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -3100,7 +3063,13 @@ WorkerAggregateExpressionList(Aggref *originalAggregate,
|
||||||
Expr *directarg;
|
Expr *directarg;
|
||||||
foreach_ptr(directarg, originalAggregate->aggdirectargs)
|
foreach_ptr(directarg, originalAggregate->aggdirectargs)
|
||||||
{
|
{
|
||||||
if (!IsA(directarg, Const) && !IsA(directarg, Param))
|
/*
|
||||||
|
* The worker aggregation should execute any node that contains any
|
||||||
|
* Var nodes and return the result in the targetlist, so that the
|
||||||
|
* combine query can then fetch the result via remote scan; see
|
||||||
|
* MasterAggregateExpression.
|
||||||
|
*/
|
||||||
|
if (pull_var_clause_default((Node *) directarg) != NIL)
|
||||||
{
|
{
|
||||||
workerAggregateList = lappend(workerAggregateList, directarg);
|
workerAggregateList = lappend(workerAggregateList, directarg);
|
||||||
}
|
}
|
||||||
|
@ -4803,87 +4772,6 @@ WorkerSortClauseList(Node *limitCount, List *groupClauseList, List *sortClauseLi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* GenerateNewTargetEntriesForSortClauses goes over provided sort clause lists and
|
|
||||||
* creates new target entries if needed to make sure sort clauses has correct
|
|
||||||
* references. The function returns list of new target entries, caller is
|
|
||||||
* responsible to add those target entries to the end of worker target list.
|
|
||||||
*
|
|
||||||
* The function is required because we change the target entry if it contains an
|
|
||||||
* expression having an aggregate operation, or just the AVG aggregate.
|
|
||||||
* Afterwards any order by clause referring to original target entry starts
|
|
||||||
* to point to a wrong expression.
|
|
||||||
*
|
|
||||||
* Note the function modifies SortGroupClause items in sortClauseList,
|
|
||||||
* targetProjectionNumber, and nextSortGroupRefIndex.
|
|
||||||
*/
|
|
||||||
static List *
|
|
||||||
GenerateNewTargetEntriesForSortClauses(List *originalTargetList,
|
|
||||||
List *sortClauseList,
|
|
||||||
AttrNumber *targetProjectionNumber,
|
|
||||||
Index *nextSortGroupRefIndex)
|
|
||||||
{
|
|
||||||
List *createdTargetList = NIL;
|
|
||||||
|
|
||||||
SortGroupClause *sgClause = NULL;
|
|
||||||
foreach_ptr(sgClause, sortClauseList)
|
|
||||||
{
|
|
||||||
TargetEntry *targetEntry = get_sortgroupclause_tle(sgClause, originalTargetList);
|
|
||||||
Expr *targetExpr = targetEntry->expr;
|
|
||||||
bool containsAggregate = contain_aggs_of_level((Node *) targetExpr, 0);
|
|
||||||
bool createNewTargetEntry = false;
|
|
||||||
|
|
||||||
/* we are only interested in target entries containing aggregates */
|
|
||||||
if (!containsAggregate)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If the target expression is not an Aggref, it is either an expression
|
|
||||||
* on a single aggregate, or expression containing multiple aggregates.
|
|
||||||
* Worker query mutates these target entries to have a naked target entry
|
|
||||||
* per aggregate function. We want to use original target entries if this
|
|
||||||
* the case.
|
|
||||||
* If the original target expression is an avg aggref, we also want to use
|
|
||||||
* original target entry.
|
|
||||||
*/
|
|
||||||
if (!IsA(targetExpr, Aggref))
|
|
||||||
{
|
|
||||||
createNewTargetEntry = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Aggref *aggNode = (Aggref *) targetExpr;
|
|
||||||
AggregateType aggregateType = GetAggregateType(aggNode);
|
|
||||||
if (aggregateType == AGGREGATE_AVERAGE)
|
|
||||||
{
|
|
||||||
createNewTargetEntry = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (createNewTargetEntry)
|
|
||||||
{
|
|
||||||
bool resJunk = true;
|
|
||||||
AttrNumber nextResNo = (*targetProjectionNumber);
|
|
||||||
Expr *newExpr = copyObject(targetExpr);
|
|
||||||
TargetEntry *newTargetEntry = makeTargetEntry(newExpr, nextResNo,
|
|
||||||
targetEntry->resname, resJunk);
|
|
||||||
newTargetEntry->ressortgroupref = *nextSortGroupRefIndex;
|
|
||||||
|
|
||||||
createdTargetList = lappend(createdTargetList, newTargetEntry);
|
|
||||||
|
|
||||||
sgClause->tleSortGroupRef = *nextSortGroupRefIndex;
|
|
||||||
|
|
||||||
(*nextSortGroupRefIndex)++;
|
|
||||||
(*targetProjectionNumber)++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return createdTargetList;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CanPushDownLimitApproximate checks if we can push down the limit clause to
|
* CanPushDownLimitApproximate checks if we can push down the limit clause to
|
||||||
* the worker nodes, and get approximate and meaningful results. We can do this
|
* the worker nodes, and get approximate and meaningful results. We can do this
|
||||||
|
|
|
@ -1534,6 +1534,7 @@ MultiTableNodeList(List *tableEntryList, List *rangeTableList)
|
||||||
tableNode->partitionColumn = partitionColumn;
|
tableNode->partitionColumn = partitionColumn;
|
||||||
tableNode->alias = rangeTableEntry->alias;
|
tableNode->alias = rangeTableEntry->alias;
|
||||||
tableNode->referenceNames = rangeTableEntry->eref;
|
tableNode->referenceNames = rangeTableEntry->eref;
|
||||||
|
tableNode->includePartitions = GetOriginalInh(rangeTableEntry);
|
||||||
|
|
||||||
tableNodeList = lappend(tableNodeList, tableNode);
|
tableNodeList = lappend(tableNodeList, tableNode);
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,6 +45,7 @@
|
||||||
#include "distributed/multi_join_order.h"
|
#include "distributed/multi_join_order.h"
|
||||||
#include "distributed/multi_logical_optimizer.h"
|
#include "distributed/multi_logical_optimizer.h"
|
||||||
#include "distributed/multi_logical_planner.h"
|
#include "distributed/multi_logical_planner.h"
|
||||||
|
#include "distributed/multi_partitioning_utils.h"
|
||||||
#include "distributed/multi_physical_planner.h"
|
#include "distributed/multi_physical_planner.h"
|
||||||
#include "distributed/log_utils.h"
|
#include "distributed/log_utils.h"
|
||||||
#include "distributed/pg_dist_partition.h"
|
#include "distributed/pg_dist_partition.h"
|
||||||
|
@ -743,6 +744,8 @@ BaseRangeTableList(MultiNode *multiNode)
|
||||||
rangeTableEntry->eref = multiTable->referenceNames;
|
rangeTableEntry->eref = multiTable->referenceNames;
|
||||||
rangeTableEntry->alias = multiTable->alias;
|
rangeTableEntry->alias = multiTable->alias;
|
||||||
rangeTableEntry->relid = multiTable->relationId;
|
rangeTableEntry->relid = multiTable->relationId;
|
||||||
|
rangeTableEntry->inh = multiTable->includePartitions;
|
||||||
|
|
||||||
SetRangeTblExtraData(rangeTableEntry, CITUS_RTE_RELATION, NULL, NULL,
|
SetRangeTblExtraData(rangeTableEntry, CITUS_RTE_RELATION, NULL, NULL,
|
||||||
list_make1_int(multiTable->rangeTableId),
|
list_make1_int(multiTable->rangeTableId),
|
||||||
NIL, NIL, NIL, NIL);
|
NIL, NIL, NIL, NIL);
|
||||||
|
@ -824,7 +827,21 @@ static List *
|
||||||
QueryTargetList(MultiNode *multiNode)
|
QueryTargetList(MultiNode *multiNode)
|
||||||
{
|
{
|
||||||
List *projectNodeList = FindNodesOfType(multiNode, T_MultiProject);
|
List *projectNodeList = FindNodesOfType(multiNode, T_MultiProject);
|
||||||
Assert(list_length(projectNodeList) > 0);
|
if (list_length(projectNodeList) == 0)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* The physical planner assumes that all worker queries would have
|
||||||
|
* target list entries based on the fact that at least the column
|
||||||
|
* on the JOINs have to be on the target list. However, there is
|
||||||
|
* an exception to that if there is a cartesian product join and
|
||||||
|
* there is no additional target list entries belong to one side
|
||||||
|
* of the JOIN. Once we support cartesian product join, we should
|
||||||
|
* remove this error.
|
||||||
|
*/
|
||||||
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg("cannot perform distributed planning on this query"),
|
||||||
|
errdetail("Cartesian products are currently unsupported")));
|
||||||
|
}
|
||||||
|
|
||||||
MultiProject *topProjectNode = (MultiProject *) linitial(projectNodeList);
|
MultiProject *topProjectNode = (MultiProject *) linitial(projectNodeList);
|
||||||
List *columnList = topProjectNode->columnList;
|
List *columnList = topProjectNode->columnList;
|
||||||
|
@ -1454,6 +1471,7 @@ ConstructCallingRTE(RangeTblEntry *rangeTableEntry, List *dependentJobList)
|
||||||
callingRTE->rtekind = RTE_RELATION;
|
callingRTE->rtekind = RTE_RELATION;
|
||||||
callingRTE->eref = rangeTableEntry->eref;
|
callingRTE->eref = rangeTableEntry->eref;
|
||||||
callingRTE->relid = rangeTableEntry->relid;
|
callingRTE->relid = rangeTableEntry->relid;
|
||||||
|
callingRTE->inh = rangeTableEntry->inh;
|
||||||
}
|
}
|
||||||
else if (rangeTableKind == CITUS_RTE_REMOTE_QUERY)
|
else if (rangeTableKind == CITUS_RTE_REMOTE_QUERY)
|
||||||
{
|
{
|
||||||
|
@ -4352,16 +4370,8 @@ FragmentAlias(RangeTblEntry *rangeTableEntry, RangeTableFragment *fragment)
|
||||||
Oid relationId = rangeTableEntry->relid;
|
Oid relationId = rangeTableEntry->relid;
|
||||||
char *relationName = get_rel_name(relationId);
|
char *relationName = get_rel_name(relationId);
|
||||||
|
|
||||||
/*
|
|
||||||
* If the table is not in the default namespace (public), we include it in
|
|
||||||
* the fragment alias.
|
|
||||||
*/
|
|
||||||
Oid schemaId = get_rel_namespace(relationId);
|
Oid schemaId = get_rel_namespace(relationId);
|
||||||
schemaName = get_namespace_name(schemaId);
|
schemaName = get_namespace_name(schemaId);
|
||||||
if (strncmp(schemaName, "public", NAMEDATALEN) == 0)
|
|
||||||
{
|
|
||||||
schemaName = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
aliasName = relationName;
|
aliasName = relationName;
|
||||||
|
|
||||||
|
|
|
@ -555,6 +555,14 @@ ModifyPartialQuerySupported(Query *queryTree, bool multiShardQuery,
|
||||||
{
|
{
|
||||||
ListCell *cteCell = NULL;
|
ListCell *cteCell = NULL;
|
||||||
|
|
||||||
|
/* CTEs still not supported for INSERTs. */
|
||||||
|
if (queryTree->commandType == CMD_INSERT)
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"Router planner doesn't support common table expressions with INSERT queries.",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
foreach(cteCell, queryTree->cteList)
|
foreach(cteCell, queryTree->cteList)
|
||||||
{
|
{
|
||||||
CommonTableExpr *cte = (CommonTableExpr *) lfirst(cteCell);
|
CommonTableExpr *cte = (CommonTableExpr *) lfirst(cteCell);
|
||||||
|
@ -562,31 +570,22 @@ ModifyPartialQuerySupported(Query *queryTree, bool multiShardQuery,
|
||||||
|
|
||||||
if (cteQuery->commandType != CMD_SELECT)
|
if (cteQuery->commandType != CMD_SELECT)
|
||||||
{
|
{
|
||||||
/* Modifying CTEs still not supported for INSERTs & multi shard queries. */
|
/* Modifying CTEs still not supported for multi shard queries. */
|
||||||
if (queryTree->commandType == CMD_INSERT)
|
|
||||||
{
|
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
|
||||||
"Router planner doesn't support non-select common table expressions with non-select queries.",
|
|
||||||
NULL, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (multiShardQuery)
|
if (multiShardQuery)
|
||||||
{
|
{
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
"Router planner doesn't support non-select common table expressions with multi shard queries.",
|
"Router planner doesn't support non-select common table expressions with multi shard queries.",
|
||||||
NULL, NULL);
|
NULL, NULL);
|
||||||
}
|
}
|
||||||
|
/* Modifying CTEs exclude both INSERT CTEs & INSERT queries. */
|
||||||
|
else if (cteQuery->commandType == CMD_INSERT)
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"Router planner doesn't support INSERT common table expressions.",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Modifying CTEs exclude both INSERT CTEs & INSERT queries. */
|
|
||||||
if (cteQuery->commandType == CMD_INSERT)
|
|
||||||
{
|
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
|
||||||
"Router planner doesn't support INSERT common table expressions.",
|
|
||||||
NULL, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (cteQuery->hasForUpdate &&
|
if (cteQuery->hasForUpdate &&
|
||||||
FindNodeMatchingCheckFunctionInRangeTableList(cteQuery->rtable,
|
FindNodeMatchingCheckFunctionInRangeTableList(cteQuery->rtable,
|
||||||
IsReferenceTableRTE))
|
IsReferenceTableRTE))
|
||||||
|
@ -2433,7 +2432,7 @@ CreateLocalDummyPlacement()
|
||||||
{
|
{
|
||||||
ShardPlacement *dummyPlacement = CitusMakeNode(ShardPlacement);
|
ShardPlacement *dummyPlacement = CitusMakeNode(ShardPlacement);
|
||||||
dummyPlacement->nodeId = LOCAL_NODE_ID;
|
dummyPlacement->nodeId = LOCAL_NODE_ID;
|
||||||
dummyPlacement->nodeName = LOCAL_HOST_NAME;
|
dummyPlacement->nodeName = LocalHostName;
|
||||||
dummyPlacement->nodePort = PostPortNumber;
|
dummyPlacement->nodePort = PostPortNumber;
|
||||||
dummyPlacement->groupId = GetLocalGroupId();
|
dummyPlacement->groupId = GetLocalGroupId();
|
||||||
return dummyPlacement;
|
return dummyPlacement;
|
||||||
|
|
|
@ -61,6 +61,8 @@ typedef struct AttributeEquivalenceClass
|
||||||
{
|
{
|
||||||
uint32 equivalenceId;
|
uint32 equivalenceId;
|
||||||
List *equivalentAttributes;
|
List *equivalentAttributes;
|
||||||
|
|
||||||
|
Index unionQueryPartitionKeyIndex;
|
||||||
} AttributeEquivalenceClass;
|
} AttributeEquivalenceClass;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -83,7 +85,8 @@ typedef struct AttributeEquivalenceClassMember
|
||||||
|
|
||||||
|
|
||||||
static bool ContextContainsLocalRelation(RelationRestrictionContext *restrictionContext);
|
static bool ContextContainsLocalRelation(RelationRestrictionContext *restrictionContext);
|
||||||
static Var * FindUnionAllVar(PlannerInfo *root, List *appendRelList, Oid relationOid,
|
static int RangeTableOffsetCompat(PlannerInfo *root, AppendRelInfo *appendRelInfo);
|
||||||
|
static Var * FindUnionAllVar(PlannerInfo *root, List *translatedVars, Oid relationOid,
|
||||||
Index relationRteIndex, Index *partitionKeyIndex);
|
Index relationRteIndex, Index *partitionKeyIndex);
|
||||||
static bool ContainsMultipleDistributedRelations(PlannerRestrictionContext *
|
static bool ContainsMultipleDistributedRelations(PlannerRestrictionContext *
|
||||||
plannerRestrictionContext);
|
plannerRestrictionContext);
|
||||||
|
@ -91,11 +94,11 @@ static List * GenerateAttributeEquivalencesForRelationRestrictions(
|
||||||
RelationRestrictionContext *restrictionContext);
|
RelationRestrictionContext *restrictionContext);
|
||||||
static AttributeEquivalenceClass * AttributeEquivalenceClassForEquivalenceClass(
|
static AttributeEquivalenceClass * AttributeEquivalenceClassForEquivalenceClass(
|
||||||
EquivalenceClass *plannerEqClass, RelationRestriction *relationRestriction);
|
EquivalenceClass *plannerEqClass, RelationRestriction *relationRestriction);
|
||||||
static void AddToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
static void AddToAttributeEquivalenceClass(AttributeEquivalenceClass *
|
||||||
attributeEquivalenceClass,
|
attributeEquivalenceClass,
|
||||||
PlannerInfo *root, Var *varToBeAdded);
|
PlannerInfo *root, Var *varToBeAdded);
|
||||||
static void AddRteSubqueryToAttributeEquivalenceClass(AttributeEquivalenceClass *
|
static void AddRteSubqueryToAttributeEquivalenceClass(AttributeEquivalenceClass *
|
||||||
*attributeEquivalenceClass,
|
attributeEquivalenceClass,
|
||||||
RangeTblEntry *
|
RangeTblEntry *
|
||||||
rangeTableEntry,
|
rangeTableEntry,
|
||||||
PlannerInfo *root,
|
PlannerInfo *root,
|
||||||
|
@ -103,17 +106,17 @@ static void AddRteSubqueryToAttributeEquivalenceClass(AttributeEquivalenceClass
|
||||||
static Query * GetTargetSubquery(PlannerInfo *root, RangeTblEntry *rangeTableEntry,
|
static Query * GetTargetSubquery(PlannerInfo *root, RangeTblEntry *rangeTableEntry,
|
||||||
Var *varToBeAdded);
|
Var *varToBeAdded);
|
||||||
static void AddUnionAllSetOperationsToAttributeEquivalenceClass(
|
static void AddUnionAllSetOperationsToAttributeEquivalenceClass(
|
||||||
AttributeEquivalenceClass **
|
AttributeEquivalenceClass *
|
||||||
attributeEquivalenceClass,
|
attributeEquivalenceClass,
|
||||||
PlannerInfo *root,
|
PlannerInfo *root,
|
||||||
Var *varToBeAdded);
|
Var *varToBeAdded);
|
||||||
static void AddUnionSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
static void AddUnionSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass *
|
||||||
attributeEquivalenceClass,
|
attributeEquivalenceClass,
|
||||||
PlannerInfo *root,
|
PlannerInfo *root,
|
||||||
SetOperationStmt *
|
SetOperationStmt *
|
||||||
setOperation,
|
setOperation,
|
||||||
Var *varToBeAdded);
|
Var *varToBeAdded);
|
||||||
static void AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
static void AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass *
|
||||||
attrEquivalenceClass,
|
attrEquivalenceClass,
|
||||||
RangeTblEntry *rangeTableEntry,
|
RangeTblEntry *rangeTableEntry,
|
||||||
Var *varToBeAdded);
|
Var *varToBeAdded);
|
||||||
|
@ -141,7 +144,7 @@ static AttributeEquivalenceClass * GenerateEquivalenceClassForRelationRestrictio
|
||||||
RelationRestrictionContext
|
RelationRestrictionContext
|
||||||
*
|
*
|
||||||
relationRestrictionContext);
|
relationRestrictionContext);
|
||||||
static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass **
|
static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass *
|
||||||
firstClass,
|
firstClass,
|
||||||
AttributeEquivalenceClass *
|
AttributeEquivalenceClass *
|
||||||
secondClass);
|
secondClass);
|
||||||
|
@ -156,9 +159,13 @@ static JoinRestrictionContext * FilterJoinRestrictionContext(
|
||||||
static bool RangeTableArrayContainsAnyRTEIdentities(RangeTblEntry **rangeTableEntries, int
|
static bool RangeTableArrayContainsAnyRTEIdentities(RangeTblEntry **rangeTableEntries, int
|
||||||
rangeTableArrayLength, Relids
|
rangeTableArrayLength, Relids
|
||||||
queryRteIdentities);
|
queryRteIdentities);
|
||||||
static int RangeTableOffsetCompat(PlannerInfo *root, AppendRelInfo *appendRelInfo);
|
|
||||||
static Relids QueryRteIdentities(Query *queryTree);
|
static Relids QueryRteIdentities(Query *queryTree);
|
||||||
|
|
||||||
|
#if PG_VERSION_NUM >= PG_VERSION_13
|
||||||
|
static int ParentCountPriorToAppendRel(List *appendRelList, AppendRelInfo *appendRelInfo);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* AllDistributionKeysInQueryAreEqual returns true if either
|
* AllDistributionKeysInQueryAreEqual returns true if either
|
||||||
* (i) there exists join in the query and all relations joined on their
|
* (i) there exists join in the query and all relations joined on their
|
||||||
|
@ -249,7 +256,7 @@ SafeToPushdownUnionSubquery(PlannerRestrictionContext *plannerRestrictionContext
|
||||||
plannerRestrictionContext->relationRestrictionContext;
|
plannerRestrictionContext->relationRestrictionContext;
|
||||||
JoinRestrictionContext *joinRestrictionContext =
|
JoinRestrictionContext *joinRestrictionContext =
|
||||||
plannerRestrictionContext->joinRestrictionContext;
|
plannerRestrictionContext->joinRestrictionContext;
|
||||||
Index unionQueryPartitionKeyIndex = 0;
|
|
||||||
AttributeEquivalenceClass *attributeEquivalence =
|
AttributeEquivalenceClass *attributeEquivalence =
|
||||||
palloc0(sizeof(AttributeEquivalenceClass));
|
palloc0(sizeof(AttributeEquivalenceClass));
|
||||||
ListCell *relationRestrictionCell = NULL;
|
ListCell *relationRestrictionCell = NULL;
|
||||||
|
@ -279,7 +286,8 @@ SafeToPushdownUnionSubquery(PlannerRestrictionContext *plannerRestrictionContext
|
||||||
*/
|
*/
|
||||||
if (appendRelList != NULL)
|
if (appendRelList != NULL)
|
||||||
{
|
{
|
||||||
varToBeAdded = FindUnionAllVar(relationPlannerRoot, appendRelList,
|
varToBeAdded = FindUnionAllVar(relationPlannerRoot,
|
||||||
|
relationRestriction->translatedVars,
|
||||||
relationRestriction->relationId,
|
relationRestriction->relationId,
|
||||||
relationRestriction->index,
|
relationRestriction->index,
|
||||||
&partitionKeyIndex);
|
&partitionKeyIndex);
|
||||||
|
@ -323,17 +331,17 @@ SafeToPushdownUnionSubquery(PlannerRestrictionContext *plannerRestrictionContext
|
||||||
* we check whether all the relations have partition keys in the
|
* we check whether all the relations have partition keys in the
|
||||||
* same position.
|
* same position.
|
||||||
*/
|
*/
|
||||||
if (unionQueryPartitionKeyIndex == InvalidAttrNumber)
|
if (attributeEquivalence->unionQueryPartitionKeyIndex == InvalidAttrNumber)
|
||||||
{
|
{
|
||||||
unionQueryPartitionKeyIndex = partitionKeyIndex;
|
attributeEquivalence->unionQueryPartitionKeyIndex = partitionKeyIndex;
|
||||||
}
|
}
|
||||||
else if (unionQueryPartitionKeyIndex != partitionKeyIndex)
|
else if (attributeEquivalence->unionQueryPartitionKeyIndex != partitionKeyIndex)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert(varToBeAdded != NULL);
|
Assert(varToBeAdded != NULL);
|
||||||
AddToAttributeEquivalenceClass(&attributeEquivalence, relationPlannerRoot,
|
AddToAttributeEquivalenceClass(attributeEquivalence, relationPlannerRoot,
|
||||||
varToBeAdded);
|
varToBeAdded);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -373,66 +381,74 @@ SafeToPushdownUnionSubquery(PlannerRestrictionContext *plannerRestrictionContext
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RangeTableOffsetCompat returns the range table offset(in glob->finalrtable) for the appendRelInfo.
|
||||||
|
* For PG < 13 this is a no op.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
RangeTableOffsetCompat(PlannerInfo *root, AppendRelInfo *appendRelInfo)
|
||||||
|
{
|
||||||
|
#if PG_VERSION_NUM >= PG_VERSION_13
|
||||||
|
int parentCount = ParentCountPriorToAppendRel(root->append_rel_list, appendRelInfo);
|
||||||
|
int skipParentCount = parentCount - 1;
|
||||||
|
|
||||||
|
int i = 1;
|
||||||
|
for (; i < root->simple_rel_array_size; i++)
|
||||||
|
{
|
||||||
|
RangeTblEntry *rte = root->simple_rte_array[i];
|
||||||
|
if (rte->inh)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We skip the previous parents because we want to find the offset
|
||||||
|
* for the given append rel info.
|
||||||
|
*/
|
||||||
|
if (skipParentCount > 0)
|
||||||
|
{
|
||||||
|
skipParentCount--;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int indexInRtable = (i - 1);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Postgres adds the global rte array size to parent_relid as an offset.
|
||||||
|
* Here we do the reverse operation: Commit on postgres side:
|
||||||
|
* 6ef77cf46e81f45716ec981cb08781d426181378
|
||||||
|
*/
|
||||||
|
int parentRelIndex = appendRelInfo->parent_relid - 1;
|
||||||
|
return parentRelIndex - indexInRtable;
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* FindUnionAllVar finds the variable used in union all for the side that has
|
* FindUnionAllVar finds the variable used in union all for the side that has
|
||||||
* relationRteIndex as its index and the same varattno as the partition key of
|
* relationRteIndex as its index and the same varattno as the partition key of
|
||||||
* the given relation with relationOid.
|
* the given relation with relationOid.
|
||||||
*/
|
*/
|
||||||
static Var *
|
static Var *
|
||||||
FindUnionAllVar(PlannerInfo *root, List *appendRelList, Oid relationOid,
|
FindUnionAllVar(PlannerInfo *root, List *translatedVars, Oid relationOid,
|
||||||
Index relationRteIndex, Index *partitionKeyIndex)
|
Index relationRteIndex, Index *partitionKeyIndex)
|
||||||
{
|
{
|
||||||
ListCell *appendRelCell = NULL;
|
if (!IsCitusTableType(relationOid, STRICTLY_PARTITIONED_DISTRIBUTED_TABLE))
|
||||||
AppendRelInfo *targetAppendRelInfo = NULL;
|
|
||||||
AttrNumber childAttrNumber = 0;
|
|
||||||
|
|
||||||
*partitionKeyIndex = 0;
|
|
||||||
|
|
||||||
/* iterate on the queries that are part of UNION ALL subselects */
|
|
||||||
foreach(appendRelCell, appendRelList)
|
|
||||||
{
|
|
||||||
AppendRelInfo *appendRelInfo = (AppendRelInfo *) lfirst(appendRelCell);
|
|
||||||
|
|
||||||
|
|
||||||
int rtoffset = RangeTableOffsetCompat(root, appendRelInfo);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We're only interested in the child rel that is equal to the
|
|
||||||
* relation we're investigating.
|
|
||||||
*/
|
|
||||||
if (appendRelInfo->child_relid - rtoffset == relationRteIndex)
|
|
||||||
{
|
|
||||||
targetAppendRelInfo = appendRelInfo;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!targetAppendRelInfo)
|
|
||||||
{
|
{
|
||||||
|
/* we only care about hash and range partitioned tables */
|
||||||
|
*partitionKeyIndex = 0;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
Var *relationPartitionKey = DistPartitionKeyOrError(relationOid);
|
Var *relationPartitionKey = DistPartitionKeyOrError(relationOid);
|
||||||
|
|
||||||
#if PG_VERSION_NUM >= PG_VERSION_13
|
AttrNumber childAttrNumber = 0;
|
||||||
for (; childAttrNumber < targetAppendRelInfo->num_child_cols; childAttrNumber++)
|
*partitionKeyIndex = 0;
|
||||||
{
|
|
||||||
int curAttNo = targetAppendRelInfo->parent_colnos[childAttrNumber];
|
|
||||||
if (curAttNo == relationPartitionKey->varattno)
|
|
||||||
{
|
|
||||||
*partitionKeyIndex = (childAttrNumber + 1);
|
|
||||||
int rtoffset = RangeTableOffsetCompat(root, targetAppendRelInfo);
|
|
||||||
relationPartitionKey->varno = targetAppendRelInfo->child_relid - rtoffset;
|
|
||||||
return relationPartitionKey;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
ListCell *translatedVarCell;
|
ListCell *translatedVarCell;
|
||||||
List *translaterVars = targetAppendRelInfo->translated_vars;
|
foreach(translatedVarCell, translatedVars)
|
||||||
foreach(translatedVarCell, translaterVars)
|
|
||||||
{
|
{
|
||||||
Node *targetNode = (Node *) lfirst(translatedVarCell);
|
Node *targetNode = (Node *) lfirst(translatedVarCell);
|
||||||
|
|
||||||
childAttrNumber++;
|
childAttrNumber++;
|
||||||
|
|
||||||
if (!IsA(targetNode, Var))
|
if (!IsA(targetNode, Var))
|
||||||
|
@ -449,7 +465,6 @@ FindUnionAllVar(PlannerInfo *root, List *appendRelList, Oid relationOid,
|
||||||
return targetVar;
|
return targetVar;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -580,7 +595,6 @@ GenerateAllAttributeEquivalences(PlannerRestrictionContext *plannerRestrictionCo
|
||||||
JoinRestrictionContext *joinRestrictionContext =
|
JoinRestrictionContext *joinRestrictionContext =
|
||||||
plannerRestrictionContext->joinRestrictionContext;
|
plannerRestrictionContext->joinRestrictionContext;
|
||||||
|
|
||||||
|
|
||||||
/* reset the equivalence id counter per call to prevent overflows */
|
/* reset the equivalence id counter per call to prevent overflows */
|
||||||
attributeEquivalenceId = 1;
|
attributeEquivalenceId = 1;
|
||||||
|
|
||||||
|
@ -788,14 +802,14 @@ AttributeEquivalenceClassForEquivalenceClass(EquivalenceClass *plannerEqClass,
|
||||||
equivalenceParam, &outerNodeRoot);
|
equivalenceParam, &outerNodeRoot);
|
||||||
if (expressionVar)
|
if (expressionVar)
|
||||||
{
|
{
|
||||||
AddToAttributeEquivalenceClass(&attributeEquivalence, outerNodeRoot,
|
AddToAttributeEquivalenceClass(attributeEquivalence, outerNodeRoot,
|
||||||
expressionVar);
|
expressionVar);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (IsA(strippedEquivalenceExpr, Var))
|
else if (IsA(strippedEquivalenceExpr, Var))
|
||||||
{
|
{
|
||||||
expressionVar = (Var *) strippedEquivalenceExpr;
|
expressionVar = (Var *) strippedEquivalenceExpr;
|
||||||
AddToAttributeEquivalenceClass(&attributeEquivalence, plannerInfo,
|
AddToAttributeEquivalenceClass(attributeEquivalence, plannerInfo,
|
||||||
expressionVar);
|
expressionVar);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -978,7 +992,7 @@ GenerateCommonEquivalence(List *attributeEquivalenceList,
|
||||||
if (AttributeClassContainsAttributeClassMember(attributeEquialanceMember,
|
if (AttributeClassContainsAttributeClassMember(attributeEquialanceMember,
|
||||||
commonEquivalenceClass))
|
commonEquivalenceClass))
|
||||||
{
|
{
|
||||||
ListConcatUniqueAttributeClassMemberLists(&commonEquivalenceClass,
|
ListConcatUniqueAttributeClassMemberLists(commonEquivalenceClass,
|
||||||
currentEquivalenceClass);
|
currentEquivalenceClass);
|
||||||
|
|
||||||
addedEquivalenceIds = bms_add_member(addedEquivalenceIds,
|
addedEquivalenceIds = bms_add_member(addedEquivalenceIds,
|
||||||
|
@ -1058,7 +1072,7 @@ GenerateEquivalenceClassForRelationRestriction(
|
||||||
* firstClass.
|
* firstClass.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass **firstClass,
|
ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass *firstClass,
|
||||||
AttributeEquivalenceClass *secondClass)
|
AttributeEquivalenceClass *secondClass)
|
||||||
{
|
{
|
||||||
ListCell *equivalenceClassMemberCell = NULL;
|
ListCell *equivalenceClassMemberCell = NULL;
|
||||||
|
@ -1069,13 +1083,13 @@ ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass **firstClass
|
||||||
AttributeEquivalenceClassMember *newEqMember =
|
AttributeEquivalenceClassMember *newEqMember =
|
||||||
(AttributeEquivalenceClassMember *) lfirst(equivalenceClassMemberCell);
|
(AttributeEquivalenceClassMember *) lfirst(equivalenceClassMemberCell);
|
||||||
|
|
||||||
if (AttributeClassContainsAttributeClassMember(newEqMember, *firstClass))
|
if (AttributeClassContainsAttributeClassMember(newEqMember, firstClass))
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
(*firstClass)->equivalentAttributes = lappend((*firstClass)->equivalentAttributes,
|
firstClass->equivalentAttributes = lappend(firstClass->equivalentAttributes,
|
||||||
newEqMember);
|
newEqMember);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1150,10 +1164,10 @@ GenerateAttributeEquivalencesForJoinRestrictions(JoinRestrictionContext *
|
||||||
sizeof(AttributeEquivalenceClass));
|
sizeof(AttributeEquivalenceClass));
|
||||||
attributeEquivalence->equivalenceId = attributeEquivalenceId++;
|
attributeEquivalence->equivalenceId = attributeEquivalenceId++;
|
||||||
|
|
||||||
AddToAttributeEquivalenceClass(&attributeEquivalence,
|
AddToAttributeEquivalenceClass(attributeEquivalence,
|
||||||
joinRestriction->plannerInfo, leftVar);
|
joinRestriction->plannerInfo, leftVar);
|
||||||
|
|
||||||
AddToAttributeEquivalenceClass(&attributeEquivalence,
|
AddToAttributeEquivalenceClass(attributeEquivalence,
|
||||||
joinRestriction->plannerInfo, rightVar);
|
joinRestriction->plannerInfo, rightVar);
|
||||||
|
|
||||||
attributeEquivalenceList =
|
attributeEquivalenceList =
|
||||||
|
@ -1194,7 +1208,7 @@ GenerateAttributeEquivalencesForJoinRestrictions(JoinRestrictionContext *
|
||||||
* equivalence class
|
* equivalence class
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
AddToAttributeEquivalenceClass(AttributeEquivalenceClass **attributeEquivalenceClass,
|
AddToAttributeEquivalenceClass(AttributeEquivalenceClass *attributeEquivalenceClass,
|
||||||
PlannerInfo *root, Var *varToBeAdded)
|
PlannerInfo *root, Var *varToBeAdded)
|
||||||
{
|
{
|
||||||
/* punt if it's a whole-row var rather than a plain column reference */
|
/* punt if it's a whole-row var rather than a plain column reference */
|
||||||
|
@ -1233,9 +1247,10 @@ AddToAttributeEquivalenceClass(AttributeEquivalenceClass **attributeEquivalenceC
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
AddRteSubqueryToAttributeEquivalenceClass(AttributeEquivalenceClass
|
AddRteSubqueryToAttributeEquivalenceClass(AttributeEquivalenceClass
|
||||||
**attributeEquivalenceClass,
|
*attributeEquivalenceClass,
|
||||||
RangeTblEntry *rangeTableEntry,
|
RangeTblEntry *rangeTableEntry,
|
||||||
PlannerInfo *root, Var *varToBeAdded)
|
PlannerInfo *root,
|
||||||
|
Var *varToBeAdded)
|
||||||
{
|
{
|
||||||
RelOptInfo *baseRelOptInfo = find_base_rel(root, varToBeAdded->varno);
|
RelOptInfo *baseRelOptInfo = find_base_rel(root, varToBeAdded->varno);
|
||||||
Query *targetSubquery = GetTargetSubquery(root, rangeTableEntry, varToBeAdded);
|
Query *targetSubquery = GetTargetSubquery(root, rangeTableEntry, varToBeAdded);
|
||||||
|
@ -1355,7 +1370,7 @@ GetTargetSubquery(PlannerInfo *root, RangeTblEntry *rangeTableEntry, Var *varToB
|
||||||
* var the given equivalence class.
|
* var the given equivalence class.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
AddUnionAllSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
AddUnionAllSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass *
|
||||||
attributeEquivalenceClass,
|
attributeEquivalenceClass,
|
||||||
PlannerInfo *root,
|
PlannerInfo *root,
|
||||||
Var *varToBeAdded)
|
Var *varToBeAdded)
|
||||||
|
@ -1377,41 +1392,101 @@ AddUnionAllSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
int rtoffset = RangeTableOffsetCompat(root, appendRelInfo);
|
int rtoffset = RangeTableOffsetCompat(root, appendRelInfo);
|
||||||
|
int childRelId = appendRelInfo->child_relid - rtoffset;
|
||||||
|
|
||||||
/* set the varno accordingly for this specific child */
|
if (root->simple_rel_array_size <= childRelId)
|
||||||
varToBeAdded->varno = appendRelInfo->child_relid - rtoffset;
|
{
|
||||||
|
/* we prefer to return over an Assert or error to be defensive */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
AddToAttributeEquivalenceClass(attributeEquivalenceClass, root,
|
RangeTblEntry *rte = root->simple_rte_array[childRelId];
|
||||||
varToBeAdded);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* RangeTableOffsetCompat returns the range table offset(in glob->finalrtable) for the appendRelInfo.
|
|
||||||
* For PG < 13 this is a no op.
|
|
||||||
*/
|
|
||||||
static int
|
|
||||||
RangeTableOffsetCompat(PlannerInfo *root, AppendRelInfo *appendRelInfo)
|
|
||||||
{
|
|
||||||
#if PG_VERSION_NUM >= PG_VERSION_13
|
|
||||||
int i = 1;
|
|
||||||
for (; i < root->simple_rel_array_size; i++)
|
|
||||||
{
|
|
||||||
RangeTblEntry *rte = root->simple_rte_array[i];
|
|
||||||
if (rte->inh)
|
if (rte->inh)
|
||||||
{
|
{
|
||||||
break;
|
/*
|
||||||
|
* This code-path may require improvements. If a leaf of a UNION ALL
|
||||||
|
* (e.g., an entry in appendRelList) itself is another UNION ALL
|
||||||
|
* (e.g., rte->inh = true), the logic here might get into an infinite
|
||||||
|
* recursion.
|
||||||
|
*
|
||||||
|
* The downside of "continue" here is that certain UNION ALL queries
|
||||||
|
* that are safe to pushdown may not be pushed down.
|
||||||
|
*/
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (rte->rtekind == RTE_RELATION)
|
||||||
|
{
|
||||||
|
Index partitionKeyIndex = 0;
|
||||||
|
List *translatedVars = TranslatedVarsForRteIdentity(GetRTEIdentity(rte));
|
||||||
|
Var *varToBeAddedOnUnionAllSubquery =
|
||||||
|
FindUnionAllVar(root, translatedVars, rte->relid, childRelId,
|
||||||
|
&partitionKeyIndex);
|
||||||
|
if (partitionKeyIndex == 0)
|
||||||
|
{
|
||||||
|
/* no partition key on the target list */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (attributeEquivalenceClass->unionQueryPartitionKeyIndex == 0)
|
||||||
|
{
|
||||||
|
/* the first partition key index we found */
|
||||||
|
attributeEquivalenceClass->unionQueryPartitionKeyIndex =
|
||||||
|
partitionKeyIndex;
|
||||||
|
}
|
||||||
|
else if (attributeEquivalenceClass->unionQueryPartitionKeyIndex !=
|
||||||
|
partitionKeyIndex)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Partition keys on the leaves of the UNION ALL queries on
|
||||||
|
* different ordinal positions. We cannot pushdown, so skip.
|
||||||
|
*/
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (varToBeAddedOnUnionAllSubquery != NULL)
|
||||||
|
{
|
||||||
|
AddToAttributeEquivalenceClass(attributeEquivalenceClass, root,
|
||||||
|
varToBeAddedOnUnionAllSubquery);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* set the varno accordingly for this specific child */
|
||||||
|
varToBeAdded->varno = childRelId;
|
||||||
|
|
||||||
|
AddToAttributeEquivalenceClass(attributeEquivalenceClass, root,
|
||||||
|
varToBeAdded);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int indexInRtable = (i - 1);
|
|
||||||
return appendRelInfo->parent_relid - 1 - (indexInRtable);
|
|
||||||
#else
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if PG_VERSION_NUM >= PG_VERSION_13
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ParentCountPriorToAppendRel returns the number of parents that come before
|
||||||
|
* the given append rel info.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
ParentCountPriorToAppendRel(List *appendRelList, AppendRelInfo *targetAppendRelInfo)
|
||||||
|
{
|
||||||
|
int targetParentIndex = targetAppendRelInfo->parent_relid;
|
||||||
|
Bitmapset *parent_ids = NULL;
|
||||||
|
AppendRelInfo *appendRelInfo = NULL;
|
||||||
|
foreach_ptr(appendRelInfo, appendRelList)
|
||||||
|
{
|
||||||
|
int curParentIndex = appendRelInfo->parent_relid;
|
||||||
|
if (curParentIndex <= targetParentIndex)
|
||||||
|
{
|
||||||
|
parent_ids = bms_add_member(parent_ids, curParentIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bms_num_members(parent_ids);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* AddUnionSetOperationsToAttributeEquivalenceClass recursively iterates on all the
|
* AddUnionSetOperationsToAttributeEquivalenceClass recursively iterates on all the
|
||||||
* setOperations and adds each corresponding target entry to the given equivalence
|
* setOperations and adds each corresponding target entry to the given equivalence
|
||||||
|
@ -1422,7 +1497,7 @@ RangeTableOffsetCompat(PlannerInfo *root, AppendRelInfo *appendRelInfo)
|
||||||
* messages.
|
* messages.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
AddUnionSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
AddUnionSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass *
|
||||||
attributeEquivalenceClass,
|
attributeEquivalenceClass,
|
||||||
PlannerInfo *root,
|
PlannerInfo *root,
|
||||||
SetOperationStmt *setOperation,
|
SetOperationStmt *setOperation,
|
||||||
|
@ -1450,7 +1525,7 @@ AddUnionSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
||||||
* the input rte to be an RTE_RELATION.
|
* the input rte to be an RTE_RELATION.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass *
|
||||||
attrEquivalenceClass,
|
attrEquivalenceClass,
|
||||||
RangeTblEntry *rangeTableEntry,
|
RangeTblEntry *rangeTableEntry,
|
||||||
Var *varToBeAdded)
|
Var *varToBeAdded)
|
||||||
|
@ -1487,8 +1562,8 @@ AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
||||||
attributeEqMember->rteIdentity = GetRTEIdentity(rangeTableEntry);
|
attributeEqMember->rteIdentity = GetRTEIdentity(rangeTableEntry);
|
||||||
attributeEqMember->relationId = rangeTableEntry->relid;
|
attributeEqMember->relationId = rangeTableEntry->relid;
|
||||||
|
|
||||||
(*attrEquivalenceClass)->equivalentAttributes =
|
attrEquivalenceClass->equivalentAttributes =
|
||||||
lappend((*attrEquivalenceClass)->equivalentAttributes,
|
lappend(attrEquivalenceClass->equivalentAttributes,
|
||||||
attributeEqMember);
|
attributeEqMember);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1575,6 +1575,22 @@ LowerShardBoundary(Datum partitionColumnValue, ShardInterval **shardIntervalCach
|
||||||
/* setup partitionColumnValue argument once */
|
/* setup partitionColumnValue argument once */
|
||||||
fcSetArg(compareFunction, 0, partitionColumnValue);
|
fcSetArg(compareFunction, 0, partitionColumnValue);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now we test partitionColumnValue used in where clause such as
|
||||||
|
* partCol > partitionColumnValue (or partCol >= partitionColumnValue)
|
||||||
|
* against four possibilities, these are:
|
||||||
|
* 1) partitionColumnValue falls into a specific shard, such that:
|
||||||
|
* partitionColumnValue >= shard[x].min, and
|
||||||
|
* partitionColumnValue < shard[x].max (or partitionColumnValue <= shard[x].max).
|
||||||
|
* 2) partitionColumnValue < shard[x].min for all the shards
|
||||||
|
* 3) partitionColumnValue > shard[x].max for all the shards
|
||||||
|
* 4) partitionColumnValue falls in between two shards, such that:
|
||||||
|
* partitionColumnValue > shard[x].max and
|
||||||
|
* partitionColumnValue < shard[x+1].min
|
||||||
|
*
|
||||||
|
* For 1), we find that shard in below loop using binary search and
|
||||||
|
* return the index of it. For the others, see the end of this function.
|
||||||
|
*/
|
||||||
while (lowerBoundIndex < upperBoundIndex)
|
while (lowerBoundIndex < upperBoundIndex)
|
||||||
{
|
{
|
||||||
int middleIndex = lowerBoundIndex + ((upperBoundIndex - lowerBoundIndex) / 2);
|
int middleIndex = lowerBoundIndex + ((upperBoundIndex - lowerBoundIndex) / 2);
|
||||||
|
@ -1607,7 +1623,7 @@ LowerShardBoundary(Datum partitionColumnValue, ShardInterval **shardIntervalCach
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* found interval containing partitionValue */
|
/* partitionColumnValue falls into a specific shard, possibility 1) */
|
||||||
return middleIndex;
|
return middleIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1618,20 +1634,30 @@ LowerShardBoundary(Datum partitionColumnValue, ShardInterval **shardIntervalCach
|
||||||
* (we'd have hit the return middleIndex; case otherwise). Figure out
|
* (we'd have hit the return middleIndex; case otherwise). Figure out
|
||||||
* whether there's possibly any interval containing a value that's bigger
|
* whether there's possibly any interval containing a value that's bigger
|
||||||
* than the partition key one.
|
* than the partition key one.
|
||||||
|
*
|
||||||
|
* Also note that we initialized lowerBoundIndex with 0. Similarly,
|
||||||
|
* we always set it to the index of the shard that we consider as our
|
||||||
|
* lower boundary during binary search.
|
||||||
*/
|
*/
|
||||||
if (lowerBoundIndex == 0)
|
if (lowerBoundIndex == shardCount)
|
||||||
{
|
{
|
||||||
/* all intervals are bigger, thus return 0 */
|
/*
|
||||||
return 0;
|
* Since lowerBoundIndex is an inclusive index, being equal to shardCount
|
||||||
}
|
* means all the shards have smaller values than partitionColumnValue,
|
||||||
else if (lowerBoundIndex == shardCount)
|
* which corresponds to possibility 3).
|
||||||
{
|
* In that case, since we can't have a lower bound shard, we return
|
||||||
/* partition value is bigger than all partition values */
|
* INVALID_SHARD_INDEX here.
|
||||||
|
*/
|
||||||
return INVALID_SHARD_INDEX;
|
return INVALID_SHARD_INDEX;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* value falls inbetween intervals */
|
/*
|
||||||
return lowerBoundIndex + 1;
|
* partitionColumnValue is either smaller than all the shards or falls in
|
||||||
|
* between two shards, which corresponds to possibility 2) or 4).
|
||||||
|
* Knowing that lowerBoundIndex is an inclusive index, we directly return
|
||||||
|
* it as the index for the lower bound shard here.
|
||||||
|
*/
|
||||||
|
return lowerBoundIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1651,6 +1677,23 @@ UpperShardBoundary(Datum partitionColumnValue, ShardInterval **shardIntervalCach
|
||||||
/* setup partitionColumnValue argument once */
|
/* setup partitionColumnValue argument once */
|
||||||
fcSetArg(compareFunction, 0, partitionColumnValue);
|
fcSetArg(compareFunction, 0, partitionColumnValue);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now we test partitionColumnValue used in where clause such as
|
||||||
|
* partCol < partitionColumnValue (or partCol <= partitionColumnValue)
|
||||||
|
* against four possibilities, these are:
|
||||||
|
* 1) partitionColumnValue falls into a specific shard, such that:
|
||||||
|
* partitionColumnValue <= shard[x].max, and
|
||||||
|
* partitionColumnValue > shard[x].min (or partitionColumnValue >= shard[x].min).
|
||||||
|
* 2) partitionColumnValue > shard[x].max for all the shards
|
||||||
|
* 3) partitionColumnValue < shard[x].min for all the shards
|
||||||
|
* 4) partitionColumnValue falls in between two shards, such that:
|
||||||
|
* partitionColumnValue > shard[x].max and
|
||||||
|
* partitionColumnValue < shard[x+1].min
|
||||||
|
*
|
||||||
|
* For 1), we find that shard in below loop using binary search and
|
||||||
|
* return the index of it. For the others, see the end of this function.
|
||||||
|
*/
|
||||||
|
|
||||||
while (lowerBoundIndex < upperBoundIndex)
|
while (lowerBoundIndex < upperBoundIndex)
|
||||||
{
|
{
|
||||||
int middleIndex = lowerBoundIndex + ((upperBoundIndex - lowerBoundIndex) / 2);
|
int middleIndex = lowerBoundIndex + ((upperBoundIndex - lowerBoundIndex) / 2);
|
||||||
|
@ -1683,7 +1726,7 @@ UpperShardBoundary(Datum partitionColumnValue, ShardInterval **shardIntervalCach
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* found interval containing partitionValue */
|
/* partitionColumnValue falls into a specific shard, possibility 1) */
|
||||||
return middleIndex;
|
return middleIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1694,19 +1737,29 @@ UpperShardBoundary(Datum partitionColumnValue, ShardInterval **shardIntervalCach
|
||||||
* (we'd have hit the return middleIndex; case otherwise). Figure out
|
* (we'd have hit the return middleIndex; case otherwise). Figure out
|
||||||
* whether there's possibly any interval containing a value that's smaller
|
* whether there's possibly any interval containing a value that's smaller
|
||||||
* than the partition key one.
|
* than the partition key one.
|
||||||
|
*
|
||||||
|
* Also note that we initialized upperBoundIndex with shardCount. Similarly,
|
||||||
|
* we always set it to the index of the next shard that we consider as our
|
||||||
|
* upper boundary during binary search.
|
||||||
*/
|
*/
|
||||||
if (upperBoundIndex == shardCount)
|
if (upperBoundIndex == 0)
|
||||||
{
|
{
|
||||||
/* all intervals are smaller, thus return 0 */
|
/*
|
||||||
return shardCount - 1;
|
* Since upperBoundIndex is an exclusive index, being equal to 0 means
|
||||||
}
|
* all the shards have greater values than partitionColumnValue, which
|
||||||
else if (upperBoundIndex == 0)
|
* corresponds to possibility 3).
|
||||||
{
|
* In that case, since we can't have an upper bound shard, we return
|
||||||
/* partition value is smaller than all partition values */
|
* INVALID_SHARD_INDEX here.
|
||||||
|
*/
|
||||||
return INVALID_SHARD_INDEX;
|
return INVALID_SHARD_INDEX;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* value falls inbetween intervals, return the inverval one smaller as bound */
|
/*
|
||||||
|
* partitionColumnValue is either greater than all the shards or falls in
|
||||||
|
* between two shards, which corresponds to possibility 2) or 4).
|
||||||
|
* Knowing that upperBoundIndex is an exclusive index, we return the index
|
||||||
|
* for the previous shard here.
|
||||||
|
*/
|
||||||
return upperBoundIndex - 1;
|
return upperBoundIndex - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -27,18 +27,16 @@ static ProgressMonitorData * MonitorDataFromDSMHandle(dsm_handle dsmHandle,
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CreateProgressMonitor is used to create a place to store progress information related
|
* CreateProgressMonitor is used to create a place to store progress
|
||||||
* to long running processes. The function creates a dynamic shared memory segment
|
* information related to long running processes. The function creates a
|
||||||
* consisting of a header regarding to the process and an array of "steps" that the long
|
* dynamic shared memory segment consisting of a header regarding to the
|
||||||
* running "operations" consists of. The handle of the dynamic shared memory is stored in
|
* process and an array of "steps" that the long running "operations" consists
|
||||||
* pg_stat_get_progress_info output, to be parsed by a progress retrieval command
|
* of. After initializing the data in the array of steps, the shared memory
|
||||||
* later on. This behavior may cause unrelated (but hopefully harmless) rows in
|
* segment can be shared with other processes using RegisterProgressMonitor, by
|
||||||
* pg_stat_progress_vacuum output. The caller of this function should provide a magic
|
* giving it the value that's written to the dsmHandle argument.
|
||||||
* number, a unique 64 bit unsigned integer, to distinguish different types of commands.
|
|
||||||
*/
|
*/
|
||||||
ProgressMonitorData *
|
ProgressMonitorData *
|
||||||
CreateProgressMonitor(uint64 progressTypeMagicNumber, int stepCount, Size stepSize,
|
CreateProgressMonitor(int stepCount, Size stepSize, dsm_handle *dsmHandle)
|
||||||
Oid relationId)
|
|
||||||
{
|
{
|
||||||
if (stepSize <= 0 || stepCount <= 0)
|
if (stepSize <= 0 || stepCount <= 0)
|
||||||
{
|
{
|
||||||
|
@ -58,20 +56,37 @@ CreateProgressMonitor(uint64 progressTypeMagicNumber, int stepCount, Size stepSi
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
dsm_handle dsmHandle = dsm_segment_handle(dsmSegment);
|
*dsmHandle = dsm_segment_handle(dsmSegment);
|
||||||
|
|
||||||
ProgressMonitorData *monitor = MonitorDataFromDSMHandle(dsmHandle, &dsmSegment);
|
ProgressMonitorData *monitor = MonitorDataFromDSMHandle(*dsmHandle, &dsmSegment);
|
||||||
|
|
||||||
monitor->stepCount = stepCount;
|
monitor->stepCount = stepCount;
|
||||||
monitor->processId = MyProcPid;
|
monitor->processId = MyProcPid;
|
||||||
|
return monitor;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RegisterProgressMonitor shares dsmHandle with other postgres process by
|
||||||
|
* storing it in pg_stat_get_progress_info output, to be parsed by a
|
||||||
|
* progress retrieval command later on. This behavior may cause unrelated (but
|
||||||
|
* hopefully harmless) rows in pg_stat_progress_vacuum output. The caller of
|
||||||
|
* this function should provide a magic number, a unique 64 bit unsigned
|
||||||
|
* integer, to distinguish different types of commands.
|
||||||
|
*
|
||||||
|
* IMPORTANT: After registering the progress monitor, all modification to the
|
||||||
|
* data should be done using concurrency safe operations (i.e. locks and
|
||||||
|
* atomics)
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
RegisterProgressMonitor(uint64 progressTypeMagicNumber, Oid relationId,
|
||||||
|
dsm_handle dsmHandle)
|
||||||
|
{
|
||||||
pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM, relationId);
|
pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM, relationId);
|
||||||
pgstat_progress_update_param(1, dsmHandle);
|
pgstat_progress_update_param(1, dsmHandle);
|
||||||
pgstat_progress_update_param(0, progressTypeMagicNumber);
|
pgstat_progress_update_param(0, progressTypeMagicNumber);
|
||||||
|
|
||||||
currentProgressDSMHandle = dsmHandle;
|
currentProgressDSMHandle = dsmHandle;
|
||||||
|
|
||||||
return monitor;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -204,24 +219,46 @@ ProgressMonitorData *
|
||||||
MonitorDataFromDSMHandle(dsm_handle dsmHandle, dsm_segment **attachedSegment)
|
MonitorDataFromDSMHandle(dsm_handle dsmHandle, dsm_segment **attachedSegment)
|
||||||
{
|
{
|
||||||
dsm_segment *dsmSegment = dsm_find_mapping(dsmHandle);
|
dsm_segment *dsmSegment = dsm_find_mapping(dsmHandle);
|
||||||
ProgressMonitorData *monitor = NULL;
|
|
||||||
|
|
||||||
if (dsmSegment == NULL)
|
if (dsmSegment == NULL)
|
||||||
{
|
{
|
||||||
dsmSegment = dsm_attach(dsmHandle);
|
dsmSegment = dsm_attach(dsmHandle);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dsmSegment != NULL)
|
if (dsmSegment == NULL)
|
||||||
{
|
{
|
||||||
monitor = (ProgressMonitorData *) dsm_segment_address(dsmSegment);
|
return NULL;
|
||||||
monitor->steps = (void *) (monitor + 1);
|
|
||||||
*attachedSegment = dsmSegment;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ProgressMonitorData *monitor = (ProgressMonitorData *) dsm_segment_address(
|
||||||
|
dsmSegment);
|
||||||
|
|
||||||
|
*attachedSegment = dsmSegment;
|
||||||
|
|
||||||
return monitor;
|
return monitor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ProgressMonitorSteps returns a pointer to the array of steps that are stored
|
||||||
|
* in a progress monitor. This is simply the data right after the header, so
|
||||||
|
* this function is trivial. The main purpose of this function is to make the
|
||||||
|
* intent clear to readers of the code.
|
||||||
|
*
|
||||||
|
* NOTE: The pointer this function returns is explicitly not stored in the
|
||||||
|
* header, because the header is shared between processes. The absolute pointer
|
||||||
|
* to the steps can have a different value between processes though, because
|
||||||
|
* the same piece of shared memory often has a different address in different
|
||||||
|
* processes. So we calculate this pointer over and over to make sure we use
|
||||||
|
* the right value for each process.
|
||||||
|
*/
|
||||||
|
void *
|
||||||
|
ProgressMonitorSteps(ProgressMonitorData *monitor)
|
||||||
|
{
|
||||||
|
return monitor + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* DetachFromDSMSegments ensures that the process is detached from all of the segments in
|
* DetachFromDSMSegments ensures that the process is detached from all of the segments in
|
||||||
* the given list.
|
* the given list.
|
||||||
|
|
|
@ -556,30 +556,6 @@ RelayEventExtendNames(Node *parseTree, char *schemaName, uint64 shardId)
|
||||||
|
|
||||||
AppendShardIdToName(oldRelationName, shardId);
|
AppendShardIdToName(oldRelationName, shardId);
|
||||||
AppendShardIdToName(newRelationName, shardId);
|
AppendShardIdToName(newRelationName, shardId);
|
||||||
|
|
||||||
/*
|
|
||||||
* PostgreSQL creates array types for each ordinary table, with
|
|
||||||
* the same name plus a prefix of '_'.
|
|
||||||
*
|
|
||||||
* ALTER TABLE ... RENAME TO ... also renames the underlying
|
|
||||||
* array type, and the DDL is run in parallel connections over
|
|
||||||
* all the placements and shards at once. Concurrent access
|
|
||||||
* here deadlocks.
|
|
||||||
*
|
|
||||||
* Let's provide an easier to understand error message here
|
|
||||||
* than the deadlock one.
|
|
||||||
*
|
|
||||||
* See also https://github.com/citusdata/citus/issues/1664
|
|
||||||
*/
|
|
||||||
int newRelationNameLength = strlen(*newRelationName);
|
|
||||||
if (newRelationNameLength >= (NAMEDATALEN - 1))
|
|
||||||
{
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_NAME_TOO_LONG),
|
|
||||||
errmsg(
|
|
||||||
"shard name %s exceeds %d characters",
|
|
||||||
*newRelationName, NAMEDATALEN - 1)));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else if (objectType == OBJECT_COLUMN)
|
else if (objectType == OBJECT_COLUMN)
|
||||||
{
|
{
|
||||||
|
|
|
@ -701,6 +701,19 @@ RegisterCitusConfigVariables(void)
|
||||||
GUC_NO_SHOW_ALL,
|
GUC_NO_SHOW_ALL,
|
||||||
NoticeIfSubqueryPushdownEnabled, NULL, NULL);
|
NoticeIfSubqueryPushdownEnabled, NULL, NULL);
|
||||||
|
|
||||||
|
DefineCustomIntVariable(
|
||||||
|
"citus.remote_copy_flush_threshold",
|
||||||
|
gettext_noop("Sets the threshold for remote copy to be flushed."),
|
||||||
|
gettext_noop("When sending data over remote connections via the COPY protocol, "
|
||||||
|
"bytes are first buffered internally by libpq. If the number of "
|
||||||
|
"bytes buffered exceeds the threshold, Citus waits for all the "
|
||||||
|
"bytes to flush."),
|
||||||
|
&RemoteCopyFlushThreshold,
|
||||||
|
8 * 1024 * 1024, 0, INT_MAX,
|
||||||
|
PGC_USERSET,
|
||||||
|
GUC_UNIT_BYTE | GUC_NO_SHOW_ALL,
|
||||||
|
NULL, NULL, NULL);
|
||||||
|
|
||||||
DefineCustomIntVariable(
|
DefineCustomIntVariable(
|
||||||
"citus.local_copy_flush_threshold",
|
"citus.local_copy_flush_threshold",
|
||||||
gettext_noop("Sets the threshold for local copy to be flushed."),
|
gettext_noop("Sets the threshold for local copy to be flushed."),
|
||||||
|
@ -1238,6 +1251,16 @@ RegisterCitusConfigVariables(void)
|
||||||
GUC_STANDARD,
|
GUC_STANDARD,
|
||||||
NULL, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
|
|
||||||
|
DefineCustomIntVariable(
|
||||||
|
"citus.max_cached_connection_lifetime",
|
||||||
|
gettext_noop("Sets the maximum lifetime of cached connections to other nodes."),
|
||||||
|
NULL,
|
||||||
|
&MaxCachedConnectionLifetime,
|
||||||
|
10 * MS_PER_MINUTE, -1, INT_MAX,
|
||||||
|
PGC_USERSET,
|
||||||
|
GUC_UNIT_MS | GUC_STANDARD,
|
||||||
|
NULL, NULL, NULL);
|
||||||
|
|
||||||
DefineCustomIntVariable(
|
DefineCustomIntVariable(
|
||||||
"citus.repartition_join_bucket_count_per_node",
|
"citus.repartition_join_bucket_count_per_node",
|
||||||
gettext_noop("Sets the bucket size for repartition joins per node"),
|
gettext_noop("Sets the bucket size for repartition joins per node"),
|
||||||
|
@ -1454,6 +1477,19 @@ RegisterCitusConfigVariables(void)
|
||||||
GUC_STANDARD,
|
GUC_STANDARD,
|
||||||
NULL, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
|
|
||||||
|
DefineCustomStringVariable(
|
||||||
|
"citus.local_hostname",
|
||||||
|
gettext_noop("Sets the hostname when connecting back to itself."),
|
||||||
|
gettext_noop("For some operations nodes, mostly the coordinator, connect back to "
|
||||||
|
"itself. When configuring SSL certificates it sometimes is required "
|
||||||
|
"to use a specific hostname to match the CN of the certificate when "
|
||||||
|
"verify-full is used."),
|
||||||
|
&LocalHostName,
|
||||||
|
"localhost",
|
||||||
|
PGC_SUSET,
|
||||||
|
GUC_STANDARD,
|
||||||
|
NULL, NULL, NULL);
|
||||||
|
|
||||||
DefineCustomBoolVariable(
|
DefineCustomBoolVariable(
|
||||||
"citus.writable_standby_coordinator",
|
"citus.writable_standby_coordinator",
|
||||||
gettext_noop("Enables simple DML via a streaming replica of the coordinator"),
|
gettext_noop("Enables simple DML via a streaming replica of the coordinator"),
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
-- citus--10.0-1--10.0-2
|
||||||
|
|
||||||
|
#include "../../columnar/sql/columnar--10.0-1--10.0-2.sql"
|
||||||
|
|
||||||
|
GRANT SELECT ON public.citus_tables TO public;
|
|
@ -0,0 +1,18 @@
|
||||||
|
-- citus--10.0-2--10.0-3
|
||||||
|
|
||||||
|
#include "udfs/citus_update_table_statistics/10.0-3.sql"
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION master_update_table_statistics(relation regclass)
|
||||||
|
RETURNS VOID
|
||||||
|
LANGUAGE C STRICT
|
||||||
|
AS 'MODULE_PATHNAME', $$citus_update_table_statistics$$;
|
||||||
|
COMMENT ON FUNCTION pg_catalog.master_update_table_statistics(regclass)
|
||||||
|
IS 'updates shard statistics of the given table';
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_get_active_worker_nodes(OUT node_name text, OUT node_port bigint)
|
||||||
|
RETURNS SETOF record
|
||||||
|
LANGUAGE C STRICT ROWS 100
|
||||||
|
AS 'MODULE_PATHNAME', $$citus_get_active_worker_nodes$$;
|
||||||
|
COMMENT ON FUNCTION pg_catalog.citus_get_active_worker_nodes()
|
||||||
|
IS 'fetch set of active worker nodes';
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
-- citus--10.0-3--10.0-4
|
||||||
|
|
||||||
|
-- This migration file aims to fix 2 issues with upgrades on clusters
|
||||||
|
|
||||||
|
-- 1. a bug in public schema dependency for citus_tables view.
|
||||||
|
--
|
||||||
|
-- Users who do not have public schema in their clusters were unable to upgrade
|
||||||
|
-- to Citus 10.x due to the citus_tables view that used to be created in public
|
||||||
|
-- schema
|
||||||
|
|
||||||
|
#include "udfs/citus_tables/10.0-4.sql"
|
||||||
|
|
||||||
|
-- 2. a bug in our PG upgrade functions
|
||||||
|
--
|
||||||
|
-- Users who took the 9.5-2--10.0-1 upgrade path already have the fix, but users
|
||||||
|
-- who took the 9.5-1--10.0-1 upgrade path do not. Hence, we repeat the CREATE OR
|
||||||
|
-- REPLACE from the 9.5-2 definition for citus_prepare_pg_upgrade.
|
||||||
|
|
||||||
|
#include "udfs/citus_prepare_pg_upgrade/9.5-2.sql"
|
||||||
|
#include "udfs/citus_finish_pg_upgrade/10.0-4.sql"
|
|
@ -0,0 +1,3 @@
|
||||||
|
-- 9.4-1--9.4-2 was added later as a patch to fix a bug in our PG upgrade functions
|
||||||
|
#include "udfs/citus_prepare_pg_upgrade/9.4-2.sql"
|
||||||
|
#include "udfs/citus_finish_pg_upgrade/9.4-2.sql"
|
|
@ -0,0 +1,9 @@
|
||||||
|
--
|
||||||
|
-- 9.4-1--9.4-2 was added later as a patch to fix a bug in our PG upgrade functions
|
||||||
|
--
|
||||||
|
-- This script brings users who installed the patch released back to the 9.4-1
|
||||||
|
-- upgrade path. We do this via a semantical downgrade since there has already been
|
||||||
|
-- introduced new changes in the schema from 9.4-1 to 9.5-1. To make sure we include all
|
||||||
|
-- changes made during that version change we decide to use the existing upgrade path from
|
||||||
|
-- our later introduced 9.4-2 version.
|
||||||
|
--
|
|
@ -0,0 +1,7 @@
|
||||||
|
-- 9.4-2--9.4-3 was added later as a patch to improve master_update_table_statistics
|
||||||
|
CREATE OR REPLACE FUNCTION master_update_table_statistics(relation regclass)
|
||||||
|
RETURNS VOID
|
||||||
|
LANGUAGE C STRICT
|
||||||
|
AS 'MODULE_PATHNAME', $$citus_update_table_statistics$$;
|
||||||
|
COMMENT ON FUNCTION pg_catalog.master_update_table_statistics(regclass)
|
||||||
|
IS 'updates shard statistics of the given table';
|
|
@ -0,0 +1,22 @@
|
||||||
|
-- citus--9.4-3--9.4-2
|
||||||
|
-- This is a downgrade path that will revert the changes made in citus--9.4-2--9.4-3.sql
|
||||||
|
-- 9.4-2--9.4-3 was added later as a patch to improve master_update_table_statistics.
|
||||||
|
-- We have this downgrade script so that we can continue from the main upgrade path
|
||||||
|
-- when upgrading to later versions.
|
||||||
|
CREATE OR REPLACE FUNCTION master_update_table_statistics(relation regclass)
|
||||||
|
RETURNS VOID AS $$
|
||||||
|
DECLARE
|
||||||
|
colocated_tables regclass[];
|
||||||
|
BEGIN
|
||||||
|
SELECT get_colocated_table_array(relation) INTO colocated_tables;
|
||||||
|
|
||||||
|
PERFORM
|
||||||
|
master_update_shard_statistics(shardid)
|
||||||
|
FROM
|
||||||
|
pg_dist_shard
|
||||||
|
WHERE
|
||||||
|
logicalrelid = ANY (colocated_tables);
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE 'plpgsql';
|
||||||
|
COMMENT ON FUNCTION master_update_table_statistics(regclass)
|
||||||
|
IS 'updates shard statistics of the given table and its colocated tables';
|
|
@ -1,10 +1,16 @@
|
||||||
-- citus--9.5-1--10.0-1
|
-- citus--9.5-1--10.0-4
|
||||||
|
|
||||||
|
-- This migration file aims to fix the issues with upgrades on clusters without public schema.
|
||||||
|
|
||||||
|
-- This file is created by the following command, and some more changes in a separate commit
|
||||||
|
-- cat citus--9.5-1--10.0-1.sql citus--10.0-1--10.0-2.sql citus--10.0-2--10.0-3.sql > citus--9.5-1--10.0-4.sql
|
||||||
|
|
||||||
|
-- copy of citus--9.5-1--10.0-1
|
||||||
|
|
||||||
DROP FUNCTION pg_catalog.upgrade_to_reference_table(regclass);
|
DROP FUNCTION pg_catalog.upgrade_to_reference_table(regclass);
|
||||||
DROP FUNCTION IF EXISTS pg_catalog.citus_total_relation_size(regclass);
|
DROP FUNCTION IF EXISTS pg_catalog.citus_total_relation_size(regclass);
|
||||||
|
|
||||||
#include "udfs/citus_total_relation_size/10.0-1.sql"
|
#include "udfs/citus_total_relation_size/10.0-1.sql"
|
||||||
#include "udfs/citus_tables/10.0-1.sql"
|
|
||||||
#include "udfs/citus_finish_pg_upgrade/10.0-1.sql"
|
#include "udfs/citus_finish_pg_upgrade/10.0-1.sql"
|
||||||
#include "udfs/alter_distributed_table/10.0-1.sql"
|
#include "udfs/alter_distributed_table/10.0-1.sql"
|
||||||
#include "udfs/alter_table_set_access_method/10.0-1.sql"
|
#include "udfs/alter_table_set_access_method/10.0-1.sql"
|
||||||
|
@ -164,4 +170,48 @@ SELECT * FROM pg_catalog.citus_worker_stat_activity();
|
||||||
ALTER VIEW citus.citus_worker_stat_activity SET SCHEMA pg_catalog;
|
ALTER VIEW citus.citus_worker_stat_activity SET SCHEMA pg_catalog;
|
||||||
GRANT SELECT ON pg_catalog.citus_worker_stat_activity TO PUBLIC;
|
GRANT SELECT ON pg_catalog.citus_worker_stat_activity TO PUBLIC;
|
||||||
|
|
||||||
|
-- copy of citus--10.0-1--10.0-2
|
||||||
|
|
||||||
|
#include "../../columnar/sql/columnar--10.0-1--10.0-2.sql"
|
||||||
|
|
||||||
|
-- copy of citus--10.0-2--10.0-3
|
||||||
|
|
||||||
|
#include "udfs/citus_update_table_statistics/10.0-3.sql"
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION master_update_table_statistics(relation regclass)
|
||||||
|
RETURNS VOID
|
||||||
|
LANGUAGE C STRICT
|
||||||
|
AS 'MODULE_PATHNAME', $$citus_update_table_statistics$$;
|
||||||
|
COMMENT ON FUNCTION pg_catalog.master_update_table_statistics(regclass)
|
||||||
|
IS 'updates shard statistics of the given table';
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_get_active_worker_nodes(OUT node_name text, OUT node_port bigint)
|
||||||
|
RETURNS SETOF record
|
||||||
|
LANGUAGE C STRICT ROWS 100
|
||||||
|
AS 'MODULE_PATHNAME', $$citus_get_active_worker_nodes$$;
|
||||||
|
COMMENT ON FUNCTION pg_catalog.citus_get_active_worker_nodes()
|
||||||
|
IS 'fetch set of active worker nodes';
|
||||||
|
|
||||||
|
-- copy of citus--10.0-3--10.0-4
|
||||||
|
|
||||||
|
-- This migration file aims to fix 2 issues with upgrades on clusters
|
||||||
|
|
||||||
|
-- 1. a bug in public schema dependency for citus_tables view.
|
||||||
|
--
|
||||||
|
-- Users who do not have public schema in their clusters were unable to upgrade
|
||||||
|
-- to Citus 10.x due to the citus_tables view that used to be created in public
|
||||||
|
-- schema
|
||||||
|
|
||||||
|
#include "udfs/citus_tables/10.0-4.sql"
|
||||||
|
|
||||||
|
-- 2. a bug in our PG upgrade functions
|
||||||
|
--
|
||||||
|
-- Users who took the 9.5-2--10.0-1 upgrade path already have the fix, but users
|
||||||
|
-- who took the 9.5-1--10.0-1 upgrade path do not. Hence, we repeat the CREATE OR
|
||||||
|
-- REPLACE from the 9.5-2 definition for citus_prepare_pg_upgrade.
|
||||||
|
|
||||||
|
#include "udfs/citus_prepare_pg_upgrade/9.5-2.sql"
|
||||||
|
#include "udfs/citus_finish_pg_upgrade/10.0-4.sql"
|
||||||
|
|
||||||
|
|
||||||
RESET search_path;
|
RESET search_path;
|
|
@ -0,0 +1,3 @@
|
||||||
|
-- 9.5-1--9.5-2 was added later as a patch to fix a bug in our PG upgrade functions
|
||||||
|
#include "udfs/citus_prepare_pg_upgrade/9.5-2.sql"
|
||||||
|
#include "udfs/citus_finish_pg_upgrade/9.5-2.sql"
|
|
@ -0,0 +1,9 @@
|
||||||
|
--
|
||||||
|
-- 9.5-1--9.5-2 was added later as a patch to fix a bug in our PG upgrade functions
|
||||||
|
--
|
||||||
|
-- This script brings users who installed the patch released back to the 9.5-1
|
||||||
|
-- upgrade path. We do this via a semantical downgrade since there has already been
|
||||||
|
-- introduced new changes in the schema from 9.5-1 to 10.0-1. To make sure we include all
|
||||||
|
-- changes made during that version change we decide to use the existing upgrade path from
|
||||||
|
-- our later introduced 9.5-1 version.
|
||||||
|
--
|
|
@ -0,0 +1,7 @@
|
||||||
|
-- 9.5-2--9.5-3 was added later as a patch to improve master_update_table_statistics
|
||||||
|
CREATE OR REPLACE FUNCTION master_update_table_statistics(relation regclass)
|
||||||
|
RETURNS VOID
|
||||||
|
LANGUAGE C STRICT
|
||||||
|
AS 'MODULE_PATHNAME', $$citus_update_table_statistics$$;
|
||||||
|
COMMENT ON FUNCTION pg_catalog.master_update_table_statistics(regclass)
|
||||||
|
IS 'updates shard statistics of the given table';
|
|
@ -0,0 +1,22 @@
|
||||||
|
-- citus--9.5-3--9.5-2
|
||||||
|
-- This is a downgrade path that will revert the changes made in citus--9.5-2--9.5-3.sql
|
||||||
|
-- 9.5-2--9.5-3 was added later as a patch to improve master_update_table_statistics.
|
||||||
|
-- We have this downgrade script so that we can continue from the main upgrade path
|
||||||
|
-- when upgrading to later versions.
|
||||||
|
CREATE OR REPLACE FUNCTION master_update_table_statistics(relation regclass)
|
||||||
|
RETURNS VOID AS $$
|
||||||
|
DECLARE
|
||||||
|
colocated_tables regclass[];
|
||||||
|
BEGIN
|
||||||
|
SELECT get_colocated_table_array(relation) INTO colocated_tables;
|
||||||
|
|
||||||
|
PERFORM
|
||||||
|
master_update_shard_statistics(shardid)
|
||||||
|
FROM
|
||||||
|
pg_dist_shard
|
||||||
|
WHERE
|
||||||
|
logicalrelid = ANY (colocated_tables);
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE 'plpgsql';
|
||||||
|
COMMENT ON FUNCTION master_update_table_statistics(regclass)
|
||||||
|
IS 'updates shard statistics of the given table and its colocated tables';
|
|
@ -1,4 +1,51 @@
|
||||||
-- citus--10.0-1--9.5-1
|
-- citus--10.0-4--9.5-1
|
||||||
|
|
||||||
|
-- This migration file aims to fix the issues with upgrades on clusters without public schema.
|
||||||
|
|
||||||
|
-- This file is created by the following command, and some more changes in a separate commit
|
||||||
|
-- cat citus--10.0-3--10.0-2.sql citus--10.0-2--10.0-1.sql citus--10.0-1--9.5-1.sql > citus--10.0-4--9.5-1.sql
|
||||||
|
|
||||||
|
-- copy of citus--10.0-4--10.0-3
|
||||||
|
--
|
||||||
|
-- 10.0-3--10.0-4 was added later as a patch to fix a bug in our PG upgrade functions
|
||||||
|
--
|
||||||
|
-- The upgrade fixes a bug in citus_(prepare|finish)_pg_upgrade. Given the old versions of
|
||||||
|
-- these functions contain a bug it is better to _not_ restore the old version and keep
|
||||||
|
-- the patched version of the function.
|
||||||
|
--
|
||||||
|
-- This is inline with the downgrade scripts for earlier versions of this patch
|
||||||
|
--
|
||||||
|
|
||||||
|
-- copy of citus--10.0-3--10.0-2
|
||||||
|
-- this is a downgrade path that will revert the changes made in citus--10.0-2--10.0-3.sql
|
||||||
|
|
||||||
|
DROP FUNCTION pg_catalog.citus_update_table_statistics(regclass);
|
||||||
|
|
||||||
|
#include "../udfs/citus_update_table_statistics/10.0-1.sql"
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION master_update_table_statistics(relation regclass)
|
||||||
|
RETURNS VOID AS $$
|
||||||
|
DECLARE
|
||||||
|
colocated_tables regclass[];
|
||||||
|
BEGIN
|
||||||
|
SELECT get_colocated_table_array(relation) INTO colocated_tables;
|
||||||
|
|
||||||
|
PERFORM
|
||||||
|
master_update_shard_statistics(shardid)
|
||||||
|
FROM
|
||||||
|
pg_dist_shard
|
||||||
|
WHERE
|
||||||
|
logicalrelid = ANY (colocated_tables);
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE 'plpgsql';
|
||||||
|
COMMENT ON FUNCTION master_update_table_statistics(regclass)
|
||||||
|
IS 'updates shard statistics of the given table and its colocated tables';
|
||||||
|
|
||||||
|
DROP FUNCTION pg_catalog.citus_get_active_worker_nodes(OUT text, OUT bigint);
|
||||||
|
/* copy of citus--10.0-2--10.0-1.sql */
|
||||||
|
#include "../../../columnar/sql/downgrades/columnar--10.0-2--10.0-1.sql"
|
||||||
|
|
||||||
|
-- copy of citus--10.0-1--9.5-1
|
||||||
|
|
||||||
-- In Citus 10.0, we added another internal udf (notify_constraint_dropped)
|
-- In Citus 10.0, we added another internal udf (notify_constraint_dropped)
|
||||||
-- to be called by citus_drop_trigger. Since this script is executed when
|
-- to be called by citus_drop_trigger. Since this script is executed when
|
||||||
|
@ -18,7 +65,8 @@ DROP FUNCTION pg_catalog.notify_constraint_dropped();
|
||||||
|
|
||||||
#include "../../../columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql"
|
#include "../../../columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql"
|
||||||
|
|
||||||
DROP VIEW public.citus_tables;
|
DROP VIEW IF EXISTS pg_catalog.citus_tables;
|
||||||
|
DROP VIEW IF EXISTS public.citus_tables;
|
||||||
DROP FUNCTION pg_catalog.alter_distributed_table(regclass, text, int, text, boolean);
|
DROP FUNCTION pg_catalog.alter_distributed_table(regclass, text, int, text, boolean);
|
||||||
DROP FUNCTION pg_catalog.alter_table_set_access_method(regclass, text);
|
DROP FUNCTION pg_catalog.alter_table_set_access_method(regclass, text);
|
||||||
DROP FUNCTION pg_catalog.citus_total_relation_size(regclass,boolean);
|
DROP FUNCTION pg_catalog.citus_total_relation_size(regclass,boolean);
|
|
@ -0,0 +1,108 @@
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_finish_pg_upgrade()
|
||||||
|
RETURNS void
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
SET search_path = pg_catalog
|
||||||
|
AS $cppu$
|
||||||
|
DECLARE
|
||||||
|
table_name regclass;
|
||||||
|
command text;
|
||||||
|
trigger_name text;
|
||||||
|
BEGIN
|
||||||
|
--
|
||||||
|
-- restore citus catalog tables
|
||||||
|
--
|
||||||
|
INSERT INTO pg_catalog.pg_dist_partition SELECT * FROM public.pg_dist_partition;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_shard SELECT * FROM public.pg_dist_shard;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_placement SELECT * FROM public.pg_dist_placement;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_node_metadata SELECT * FROM public.pg_dist_node_metadata;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_node SELECT * FROM public.pg_dist_node;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_local_group SELECT * FROM public.pg_dist_local_group;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_transaction SELECT * FROM public.pg_dist_transaction;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_colocation SELECT * FROM public.pg_dist_colocation;
|
||||||
|
-- enterprise catalog tables
|
||||||
|
INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo;
|
||||||
|
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_rebalance_strategy SELECT
|
||||||
|
name,
|
||||||
|
default_strategy,
|
||||||
|
shard_cost_function::regprocedure::regproc,
|
||||||
|
node_capacity_function::regprocedure::regproc,
|
||||||
|
shard_allowed_on_node_function::regprocedure::regproc,
|
||||||
|
default_threshold,
|
||||||
|
minimum_threshold
|
||||||
|
FROM public.pg_dist_rebalance_strategy;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger;
|
||||||
|
|
||||||
|
--
|
||||||
|
-- drop backup tables
|
||||||
|
--
|
||||||
|
DROP TABLE public.pg_dist_authinfo;
|
||||||
|
DROP TABLE public.pg_dist_colocation;
|
||||||
|
DROP TABLE public.pg_dist_local_group;
|
||||||
|
DROP TABLE public.pg_dist_node;
|
||||||
|
DROP TABLE public.pg_dist_node_metadata;
|
||||||
|
DROP TABLE public.pg_dist_partition;
|
||||||
|
DROP TABLE public.pg_dist_placement;
|
||||||
|
DROP TABLE public.pg_dist_poolinfo;
|
||||||
|
DROP TABLE public.pg_dist_shard;
|
||||||
|
DROP TABLE public.pg_dist_transaction;
|
||||||
|
DROP TABLE public.pg_dist_rebalance_strategy;
|
||||||
|
|
||||||
|
--
|
||||||
|
-- reset sequences
|
||||||
|
--
|
||||||
|
PERFORM setval('pg_catalog.pg_dist_shardid_seq', (SELECT MAX(shardid)+1 AS max_shard_id FROM pg_dist_shard), false);
|
||||||
|
PERFORM setval('pg_catalog.pg_dist_placement_placementid_seq', (SELECT MAX(placementid)+1 AS max_placement_id FROM pg_dist_placement), false);
|
||||||
|
PERFORM setval('pg_catalog.pg_dist_groupid_seq', (SELECT MAX(groupid)+1 AS max_group_id FROM pg_dist_node), false);
|
||||||
|
PERFORM setval('pg_catalog.pg_dist_node_nodeid_seq', (SELECT MAX(nodeid)+1 AS max_node_id FROM pg_dist_node), false);
|
||||||
|
PERFORM setval('pg_catalog.pg_dist_colocationid_seq', (SELECT MAX(colocationid)+1 AS max_colocation_id FROM pg_dist_colocation), false);
|
||||||
|
|
||||||
|
--
|
||||||
|
-- register triggers
|
||||||
|
--
|
||||||
|
FOR table_name IN SELECT logicalrelid FROM pg_catalog.pg_dist_partition
|
||||||
|
LOOP
|
||||||
|
trigger_name := 'truncate_trigger_' || table_name::oid;
|
||||||
|
command := 'create trigger ' || trigger_name || ' after truncate on ' || table_name || ' execute procedure pg_catalog.citus_truncate_trigger()';
|
||||||
|
EXECUTE command;
|
||||||
|
command := 'update pg_trigger set tgisinternal = true where tgname = ' || quote_literal(trigger_name);
|
||||||
|
EXECUTE command;
|
||||||
|
END LOOP;
|
||||||
|
|
||||||
|
--
|
||||||
|
-- set dependencies
|
||||||
|
--
|
||||||
|
INSERT INTO pg_depend
|
||||||
|
SELECT
|
||||||
|
'pg_class'::regclass::oid as classid,
|
||||||
|
p.logicalrelid::regclass::oid as objid,
|
||||||
|
0 as objsubid,
|
||||||
|
'pg_extension'::regclass::oid as refclassid,
|
||||||
|
(select oid from pg_extension where extname = 'citus') as refobjid,
|
||||||
|
0 as refobjsubid ,
|
||||||
|
'n' as deptype
|
||||||
|
FROM pg_catalog.pg_dist_partition p;
|
||||||
|
|
||||||
|
-- restore pg_dist_object from the stable identifiers
|
||||||
|
TRUNCATE citus.pg_dist_object;
|
||||||
|
INSERT INTO citus.pg_dist_object (classid, objid, objsubid, distribution_argument_index, colocationid)
|
||||||
|
SELECT
|
||||||
|
address.classid,
|
||||||
|
address.objid,
|
||||||
|
address.objsubid,
|
||||||
|
naming.distribution_argument_index,
|
||||||
|
naming.colocationid
|
||||||
|
FROM
|
||||||
|
public.pg_dist_object naming,
|
||||||
|
pg_catalog.pg_get_object_address(naming.type, naming.object_names, naming.object_args) address;
|
||||||
|
|
||||||
|
DROP TABLE public.pg_dist_object;
|
||||||
|
|
||||||
|
PERFORM citus_internal.columnar_ensure_objects_exist();
|
||||||
|
END;
|
||||||
|
$cppu$;
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION pg_catalog.citus_finish_pg_upgrade()
|
||||||
|
IS 'perform tasks to restore citus settings from a location that has been prepared before pg_upgrade';
|
|
@ -0,0 +1,105 @@
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_finish_pg_upgrade()
|
||||||
|
RETURNS void
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
SET search_path = pg_catalog
|
||||||
|
AS $cppu$
|
||||||
|
DECLARE
|
||||||
|
table_name regclass;
|
||||||
|
command text;
|
||||||
|
trigger_name text;
|
||||||
|
BEGIN
|
||||||
|
--
|
||||||
|
-- restore citus catalog tables
|
||||||
|
--
|
||||||
|
INSERT INTO pg_catalog.pg_dist_partition SELECT * FROM public.pg_dist_partition;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_shard SELECT * FROM public.pg_dist_shard;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_placement SELECT * FROM public.pg_dist_placement;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_node_metadata SELECT * FROM public.pg_dist_node_metadata;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_node SELECT * FROM public.pg_dist_node;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_local_group SELECT * FROM public.pg_dist_local_group;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_transaction SELECT * FROM public.pg_dist_transaction;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_colocation SELECT * FROM public.pg_dist_colocation;
|
||||||
|
-- enterprise catalog tables
|
||||||
|
INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo;
|
||||||
|
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_rebalance_strategy SELECT
|
||||||
|
name,
|
||||||
|
default_strategy,
|
||||||
|
shard_cost_function::regprocedure::regproc,
|
||||||
|
node_capacity_function::regprocedure::regproc,
|
||||||
|
shard_allowed_on_node_function::regprocedure::regproc,
|
||||||
|
default_threshold,
|
||||||
|
minimum_threshold
|
||||||
|
FROM public.pg_dist_rebalance_strategy;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger;
|
||||||
|
|
||||||
|
--
|
||||||
|
-- drop backup tables
|
||||||
|
--
|
||||||
|
DROP TABLE public.pg_dist_authinfo;
|
||||||
|
DROP TABLE public.pg_dist_colocation;
|
||||||
|
DROP TABLE public.pg_dist_local_group;
|
||||||
|
DROP TABLE public.pg_dist_node;
|
||||||
|
DROP TABLE public.pg_dist_node_metadata;
|
||||||
|
DROP TABLE public.pg_dist_partition;
|
||||||
|
DROP TABLE public.pg_dist_placement;
|
||||||
|
DROP TABLE public.pg_dist_poolinfo;
|
||||||
|
DROP TABLE public.pg_dist_shard;
|
||||||
|
DROP TABLE public.pg_dist_transaction;
|
||||||
|
|
||||||
|
--
|
||||||
|
-- reset sequences
|
||||||
|
--
|
||||||
|
PERFORM setval('pg_catalog.pg_dist_shardid_seq', (SELECT MAX(shardid)+1 AS max_shard_id FROM pg_dist_shard), false);
|
||||||
|
PERFORM setval('pg_catalog.pg_dist_placement_placementid_seq', (SELECT MAX(placementid)+1 AS max_placement_id FROM pg_dist_placement), false);
|
||||||
|
PERFORM setval('pg_catalog.pg_dist_groupid_seq', (SELECT MAX(groupid)+1 AS max_group_id FROM pg_dist_node), false);
|
||||||
|
PERFORM setval('pg_catalog.pg_dist_node_nodeid_seq', (SELECT MAX(nodeid)+1 AS max_node_id FROM pg_dist_node), false);
|
||||||
|
PERFORM setval('pg_catalog.pg_dist_colocationid_seq', (SELECT MAX(colocationid)+1 AS max_colocation_id FROM pg_dist_colocation), false);
|
||||||
|
|
||||||
|
--
|
||||||
|
-- register triggers
|
||||||
|
--
|
||||||
|
FOR table_name IN SELECT logicalrelid FROM pg_catalog.pg_dist_partition
|
||||||
|
LOOP
|
||||||
|
trigger_name := 'truncate_trigger_' || table_name::oid;
|
||||||
|
command := 'create trigger ' || trigger_name || ' after truncate on ' || table_name || ' execute procedure pg_catalog.citus_truncate_trigger()';
|
||||||
|
EXECUTE command;
|
||||||
|
command := 'update pg_trigger set tgisinternal = true where tgname = ' || quote_literal(trigger_name);
|
||||||
|
EXECUTE command;
|
||||||
|
END LOOP;
|
||||||
|
|
||||||
|
--
|
||||||
|
-- set dependencies
|
||||||
|
--
|
||||||
|
INSERT INTO pg_depend
|
||||||
|
SELECT
|
||||||
|
'pg_class'::regclass::oid as classid,
|
||||||
|
p.logicalrelid::regclass::oid as objid,
|
||||||
|
0 as objsubid,
|
||||||
|
'pg_extension'::regclass::oid as refclassid,
|
||||||
|
(select oid from pg_extension where extname = 'citus') as refobjid,
|
||||||
|
0 as refobjsubid ,
|
||||||
|
'n' as deptype
|
||||||
|
FROM pg_catalog.pg_dist_partition p;
|
||||||
|
|
||||||
|
-- restore pg_dist_object from the stable identifiers
|
||||||
|
TRUNCATE citus.pg_dist_object;
|
||||||
|
INSERT INTO citus.pg_dist_object (classid, objid, objsubid, distribution_argument_index, colocationid)
|
||||||
|
SELECT
|
||||||
|
address.classid,
|
||||||
|
address.objid,
|
||||||
|
address.objsubid,
|
||||||
|
naming.distribution_argument_index,
|
||||||
|
naming.colocationid
|
||||||
|
FROM
|
||||||
|
public.pg_dist_object naming,
|
||||||
|
pg_catalog.pg_get_object_address(naming.type, naming.object_names, naming.object_args) address;
|
||||||
|
|
||||||
|
DROP TABLE public.pg_dist_object;
|
||||||
|
END;
|
||||||
|
$cppu$;
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION pg_catalog.citus_finish_pg_upgrade()
|
||||||
|
IS 'perform tasks to restore citus settings from a location that has been prepared before pg_upgrade';
|
|
@ -0,0 +1,106 @@
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_finish_pg_upgrade()
|
||||||
|
RETURNS void
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
SET search_path = pg_catalog
|
||||||
|
AS $cppu$
|
||||||
|
DECLARE
|
||||||
|
table_name regclass;
|
||||||
|
command text;
|
||||||
|
trigger_name text;
|
||||||
|
BEGIN
|
||||||
|
--
|
||||||
|
-- restore citus catalog tables
|
||||||
|
--
|
||||||
|
INSERT INTO pg_catalog.pg_dist_partition SELECT * FROM public.pg_dist_partition;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_shard SELECT * FROM public.pg_dist_shard;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_placement SELECT * FROM public.pg_dist_placement;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_node_metadata SELECT * FROM public.pg_dist_node_metadata;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_node SELECT * FROM public.pg_dist_node;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_local_group SELECT * FROM public.pg_dist_local_group;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_transaction SELECT * FROM public.pg_dist_transaction;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_colocation SELECT * FROM public.pg_dist_colocation;
|
||||||
|
-- enterprise catalog tables
|
||||||
|
INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo;
|
||||||
|
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger;
|
||||||
|
INSERT INTO pg_catalog.pg_dist_rebalance_strategy SELECT
|
||||||
|
name,
|
||||||
|
default_strategy,
|
||||||
|
shard_cost_function::regprocedure::regproc,
|
||||||
|
node_capacity_function::regprocedure::regproc,
|
||||||
|
shard_allowed_on_node_function::regprocedure::regproc,
|
||||||
|
default_threshold,
|
||||||
|
minimum_threshold
|
||||||
|
FROM public.pg_dist_rebalance_strategy;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger;
|
||||||
|
|
||||||
|
--
|
||||||
|
-- drop backup tables
|
||||||
|
--
|
||||||
|
DROP TABLE public.pg_dist_authinfo;
|
||||||
|
DROP TABLE public.pg_dist_colocation;
|
||||||
|
DROP TABLE public.pg_dist_local_group;
|
||||||
|
DROP TABLE public.pg_dist_node;
|
||||||
|
DROP TABLE public.pg_dist_node_metadata;
|
||||||
|
DROP TABLE public.pg_dist_partition;
|
||||||
|
DROP TABLE public.pg_dist_placement;
|
||||||
|
DROP TABLE public.pg_dist_poolinfo;
|
||||||
|
DROP TABLE public.pg_dist_shard;
|
||||||
|
DROP TABLE public.pg_dist_transaction;
|
||||||
|
DROP TABLE public.pg_dist_rebalance_strategy;
|
||||||
|
|
||||||
|
--
|
||||||
|
-- reset sequences
|
||||||
|
--
|
||||||
|
PERFORM setval('pg_catalog.pg_dist_shardid_seq', (SELECT MAX(shardid)+1 AS max_shard_id FROM pg_dist_shard), false);
|
||||||
|
PERFORM setval('pg_catalog.pg_dist_placement_placementid_seq', (SELECT MAX(placementid)+1 AS max_placement_id FROM pg_dist_placement), false);
|
||||||
|
PERFORM setval('pg_catalog.pg_dist_groupid_seq', (SELECT MAX(groupid)+1 AS max_group_id FROM pg_dist_node), false);
|
||||||
|
PERFORM setval('pg_catalog.pg_dist_node_nodeid_seq', (SELECT MAX(nodeid)+1 AS max_node_id FROM pg_dist_node), false);
|
||||||
|
PERFORM setval('pg_catalog.pg_dist_colocationid_seq', (SELECT MAX(colocationid)+1 AS max_colocation_id FROM pg_dist_colocation), false);
|
||||||
|
|
||||||
|
--
|
||||||
|
-- register triggers
|
||||||
|
--
|
||||||
|
FOR table_name IN SELECT logicalrelid FROM pg_catalog.pg_dist_partition
|
||||||
|
LOOP
|
||||||
|
trigger_name := 'truncate_trigger_' || table_name::oid;
|
||||||
|
command := 'create trigger ' || trigger_name || ' after truncate on ' || table_name || ' execute procedure pg_catalog.citus_truncate_trigger()';
|
||||||
|
EXECUTE command;
|
||||||
|
command := 'update pg_trigger set tgisinternal = true where tgname = ' || quote_literal(trigger_name);
|
||||||
|
EXECUTE command;
|
||||||
|
END LOOP;
|
||||||
|
|
||||||
|
--
|
||||||
|
-- set dependencies
|
||||||
|
--
|
||||||
|
INSERT INTO pg_depend
|
||||||
|
SELECT
|
||||||
|
'pg_class'::regclass::oid as classid,
|
||||||
|
p.logicalrelid::regclass::oid as objid,
|
||||||
|
0 as objsubid,
|
||||||
|
'pg_extension'::regclass::oid as refclassid,
|
||||||
|
(select oid from pg_extension where extname = 'citus') as refobjid,
|
||||||
|
0 as refobjsubid ,
|
||||||
|
'n' as deptype
|
||||||
|
FROM pg_catalog.pg_dist_partition p;
|
||||||
|
|
||||||
|
-- restore pg_dist_object from the stable identifiers
|
||||||
|
TRUNCATE citus.pg_dist_object;
|
||||||
|
INSERT INTO citus.pg_dist_object (classid, objid, objsubid, distribution_argument_index, colocationid)
|
||||||
|
SELECT
|
||||||
|
address.classid,
|
||||||
|
address.objid,
|
||||||
|
address.objsubid,
|
||||||
|
naming.distribution_argument_index,
|
||||||
|
naming.colocationid
|
||||||
|
FROM
|
||||||
|
public.pg_dist_object naming,
|
||||||
|
pg_catalog.pg_get_object_address(naming.type, naming.object_names, naming.object_args) address;
|
||||||
|
|
||||||
|
DROP TABLE public.pg_dist_object;
|
||||||
|
END;
|
||||||
|
$cppu$;
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION pg_catalog.citus_finish_pg_upgrade()
|
||||||
|
IS 'perform tasks to restore citus settings from a location that has been prepared before pg_upgrade';
|
|
@ -86,17 +86,7 @@ BEGIN
|
||||||
FROM pg_catalog.pg_dist_partition p;
|
FROM pg_catalog.pg_dist_partition p;
|
||||||
|
|
||||||
-- restore pg_dist_object from the stable identifiers
|
-- restore pg_dist_object from the stable identifiers
|
||||||
-- DELETE/INSERT to avoid primary key violations
|
TRUNCATE citus.pg_dist_object;
|
||||||
WITH old_records AS (
|
|
||||||
DELETE FROM
|
|
||||||
citus.pg_dist_object
|
|
||||||
RETURNING
|
|
||||||
type,
|
|
||||||
object_names,
|
|
||||||
object_args,
|
|
||||||
distribution_argument_index,
|
|
||||||
colocationid
|
|
||||||
)
|
|
||||||
INSERT INTO citus.pg_dist_object (classid, objid, objsubid, distribution_argument_index, colocationid)
|
INSERT INTO citus.pg_dist_object (classid, objid, objsubid, distribution_argument_index, colocationid)
|
||||||
SELECT
|
SELECT
|
||||||
address.classid,
|
address.classid,
|
||||||
|
@ -105,8 +95,10 @@ BEGIN
|
||||||
naming.distribution_argument_index,
|
naming.distribution_argument_index,
|
||||||
naming.colocationid
|
naming.colocationid
|
||||||
FROM
|
FROM
|
||||||
old_records naming,
|
public.pg_dist_object naming,
|
||||||
pg_get_object_address(naming.type, naming.object_names, naming.object_args) address;
|
pg_catalog.pg_get_object_address(naming.type, naming.object_names, naming.object_args) address;
|
||||||
|
|
||||||
|
DROP TABLE public.pg_dist_object;
|
||||||
|
|
||||||
PERFORM citus_internal.columnar_ensure_objects_exist();
|
PERFORM citus_internal.columnar_ensure_objects_exist();
|
||||||
END;
|
END;
|
||||||
|
|
|
@ -0,0 +1,44 @@
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_prepare_pg_upgrade()
|
||||||
|
RETURNS void
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
SET search_path = pg_catalog
|
||||||
|
AS $cppu$
|
||||||
|
BEGIN
|
||||||
|
--
|
||||||
|
-- backup citus catalog tables
|
||||||
|
--
|
||||||
|
CREATE TABLE public.pg_dist_partition AS SELECT * FROM pg_catalog.pg_dist_partition;
|
||||||
|
CREATE TABLE public.pg_dist_shard AS SELECT * FROM pg_catalog.pg_dist_shard;
|
||||||
|
CREATE TABLE public.pg_dist_placement AS SELECT * FROM pg_catalog.pg_dist_placement;
|
||||||
|
CREATE TABLE public.pg_dist_node_metadata AS SELECT * FROM pg_catalog.pg_dist_node_metadata;
|
||||||
|
CREATE TABLE public.pg_dist_node AS SELECT * FROM pg_catalog.pg_dist_node;
|
||||||
|
CREATE TABLE public.pg_dist_local_group AS SELECT * FROM pg_catalog.pg_dist_local_group;
|
||||||
|
CREATE TABLE public.pg_dist_transaction AS SELECT * FROM pg_catalog.pg_dist_transaction;
|
||||||
|
CREATE TABLE public.pg_dist_colocation AS SELECT * FROM pg_catalog.pg_dist_colocation;
|
||||||
|
-- enterprise catalog tables
|
||||||
|
CREATE TABLE public.pg_dist_authinfo AS SELECT * FROM pg_catalog.pg_dist_authinfo;
|
||||||
|
CREATE TABLE public.pg_dist_poolinfo AS SELECT * FROM pg_catalog.pg_dist_poolinfo;
|
||||||
|
CREATE TABLE public.pg_dist_rebalance_strategy AS SELECT
|
||||||
|
name,
|
||||||
|
default_strategy,
|
||||||
|
shard_cost_function::regprocedure::text,
|
||||||
|
node_capacity_function::regprocedure::text,
|
||||||
|
shard_allowed_on_node_function::regprocedure::text,
|
||||||
|
default_threshold,
|
||||||
|
minimum_threshold
|
||||||
|
FROM pg_catalog.pg_dist_rebalance_strategy;
|
||||||
|
|
||||||
|
-- store upgrade stable identifiers on pg_dist_object catalog
|
||||||
|
CREATE TABLE public.pg_dist_object AS SELECT
|
||||||
|
address.type,
|
||||||
|
address.object_names,
|
||||||
|
address.object_args,
|
||||||
|
objects.distribution_argument_index,
|
||||||
|
objects.colocationid
|
||||||
|
FROM citus.pg_dist_object objects,
|
||||||
|
pg_catalog.pg_identify_object_as_address(objects.classid, objects.objid, objects.objsubid) address;
|
||||||
|
END;
|
||||||
|
$cppu$;
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION pg_catalog.citus_prepare_pg_upgrade()
|
||||||
|
IS 'perform tasks to copy citus settings to a location that could later be restored after pg_upgrade is done';
|
|
@ -0,0 +1,60 @@
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_prepare_pg_upgrade()
|
||||||
|
RETURNS void
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
SET search_path = pg_catalog
|
||||||
|
AS $cppu$
|
||||||
|
BEGIN
|
||||||
|
--
|
||||||
|
-- Drop existing backup tables
|
||||||
|
--
|
||||||
|
DROP TABLE IF EXISTS public.pg_dist_partition;
|
||||||
|
DROP TABLE IF EXISTS public.pg_dist_shard;
|
||||||
|
DROP TABLE IF EXISTS public.pg_dist_placement;
|
||||||
|
DROP TABLE IF EXISTS public.pg_dist_node_metadata;
|
||||||
|
DROP TABLE IF EXISTS public.pg_dist_node;
|
||||||
|
DROP TABLE IF EXISTS public.pg_dist_local_group;
|
||||||
|
DROP TABLE IF EXISTS public.pg_dist_transaction;
|
||||||
|
DROP TABLE IF EXISTS public.pg_dist_colocation;
|
||||||
|
DROP TABLE IF EXISTS public.pg_dist_authinfo;
|
||||||
|
DROP TABLE IF EXISTS public.pg_dist_poolinfo;
|
||||||
|
DROP TABLE IF EXISTS public.pg_dist_rebalance_strategy;
|
||||||
|
DROP TABLE IF EXISTS public.pg_dist_object;
|
||||||
|
|
||||||
|
--
|
||||||
|
-- backup citus catalog tables
|
||||||
|
--
|
||||||
|
CREATE TABLE public.pg_dist_partition AS SELECT * FROM pg_catalog.pg_dist_partition;
|
||||||
|
CREATE TABLE public.pg_dist_shard AS SELECT * FROM pg_catalog.pg_dist_shard;
|
||||||
|
CREATE TABLE public.pg_dist_placement AS SELECT * FROM pg_catalog.pg_dist_placement;
|
||||||
|
CREATE TABLE public.pg_dist_node_metadata AS SELECT * FROM pg_catalog.pg_dist_node_metadata;
|
||||||
|
CREATE TABLE public.pg_dist_node AS SELECT * FROM pg_catalog.pg_dist_node;
|
||||||
|
CREATE TABLE public.pg_dist_local_group AS SELECT * FROM pg_catalog.pg_dist_local_group;
|
||||||
|
CREATE TABLE public.pg_dist_transaction AS SELECT * FROM pg_catalog.pg_dist_transaction;
|
||||||
|
CREATE TABLE public.pg_dist_colocation AS SELECT * FROM pg_catalog.pg_dist_colocation;
|
||||||
|
-- enterprise catalog tables
|
||||||
|
CREATE TABLE public.pg_dist_authinfo AS SELECT * FROM pg_catalog.pg_dist_authinfo;
|
||||||
|
CREATE TABLE public.pg_dist_poolinfo AS SELECT * FROM pg_catalog.pg_dist_poolinfo;
|
||||||
|
CREATE TABLE public.pg_dist_rebalance_strategy AS SELECT
|
||||||
|
name,
|
||||||
|
default_strategy,
|
||||||
|
shard_cost_function::regprocedure::text,
|
||||||
|
node_capacity_function::regprocedure::text,
|
||||||
|
shard_allowed_on_node_function::regprocedure::text,
|
||||||
|
default_threshold,
|
||||||
|
minimum_threshold
|
||||||
|
FROM pg_catalog.pg_dist_rebalance_strategy;
|
||||||
|
|
||||||
|
-- store upgrade stable identifiers on pg_dist_object catalog
|
||||||
|
CREATE TABLE public.pg_dist_object AS SELECT
|
||||||
|
address.type,
|
||||||
|
address.object_names,
|
||||||
|
address.object_args,
|
||||||
|
objects.distribution_argument_index,
|
||||||
|
objects.colocationid
|
||||||
|
FROM citus.pg_dist_object objects,
|
||||||
|
pg_catalog.pg_identify_object_as_address(objects.classid, objects.objid, objects.objsubid) address;
|
||||||
|
END;
|
||||||
|
$cppu$;
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION pg_catalog.citus_prepare_pg_upgrade()
|
||||||
|
IS 'perform tasks to copy citus settings to a location that could later be restored after pg_upgrade is done';
|
|
@ -18,6 +18,7 @@ BEGIN
|
||||||
DROP TABLE IF EXISTS public.pg_dist_authinfo;
|
DROP TABLE IF EXISTS public.pg_dist_authinfo;
|
||||||
DROP TABLE IF EXISTS public.pg_dist_poolinfo;
|
DROP TABLE IF EXISTS public.pg_dist_poolinfo;
|
||||||
DROP TABLE IF EXISTS public.pg_dist_rebalance_strategy;
|
DROP TABLE IF EXISTS public.pg_dist_rebalance_strategy;
|
||||||
|
DROP TABLE IF EXISTS public.pg_dist_object;
|
||||||
|
|
||||||
--
|
--
|
||||||
-- backup citus catalog tables
|
-- backup citus catalog tables
|
||||||
|
@ -44,8 +45,14 @@ BEGIN
|
||||||
FROM pg_catalog.pg_dist_rebalance_strategy;
|
FROM pg_catalog.pg_dist_rebalance_strategy;
|
||||||
|
|
||||||
-- store upgrade stable identifiers on pg_dist_object catalog
|
-- store upgrade stable identifiers on pg_dist_object catalog
|
||||||
UPDATE citus.pg_dist_object
|
CREATE TABLE public.pg_dist_object AS SELECT
|
||||||
SET (type, object_names, object_args) = (SELECT * FROM pg_identify_object_as_address(classid, objid, objsubid));
|
address.type,
|
||||||
|
address.object_names,
|
||||||
|
address.object_args,
|
||||||
|
objects.distribution_argument_index,
|
||||||
|
objects.colocationid
|
||||||
|
FROM citus.pg_dist_object objects,
|
||||||
|
pg_catalog.pg_identify_object_as_address(objects.classid, objects.objid, objects.objsubid) address;
|
||||||
END;
|
END;
|
||||||
$cppu$;
|
$cppu$;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
DO $$
|
||||||
|
declare
|
||||||
|
citus_tables_create_query text;
|
||||||
|
BEGIN
|
||||||
|
citus_tables_create_query=$CTCQ$
|
||||||
|
CREATE OR REPLACE VIEW %I.citus_tables AS
|
||||||
|
SELECT
|
||||||
|
logicalrelid AS table_name,
|
||||||
|
CASE WHEN partkey IS NOT NULL THEN 'distributed' ELSE 'reference' END AS citus_table_type,
|
||||||
|
coalesce(column_to_column_name(logicalrelid, partkey), '<none>') AS distribution_column,
|
||||||
|
colocationid AS colocation_id,
|
||||||
|
pg_size_pretty(citus_total_relation_size(logicalrelid, fail_on_error := false)) AS table_size,
|
||||||
|
(select count(*) from pg_dist_shard where logicalrelid = p.logicalrelid) AS shard_count,
|
||||||
|
pg_get_userbyid(relowner) AS table_owner,
|
||||||
|
amname AS access_method
|
||||||
|
FROM
|
||||||
|
pg_dist_partition p
|
||||||
|
JOIN
|
||||||
|
pg_class c ON (p.logicalrelid = c.oid)
|
||||||
|
LEFT JOIN
|
||||||
|
pg_am a ON (a.oid = c.relam)
|
||||||
|
WHERE
|
||||||
|
partkey IS NOT NULL OR repmodel = 't'
|
||||||
|
ORDER BY
|
||||||
|
logicalrelid::text;
|
||||||
|
$CTCQ$;
|
||||||
|
|
||||||
|
IF EXISTS (SELECT 1 FROM pg_namespace WHERE nspname = 'public') THEN
|
||||||
|
EXECUTE format(citus_tables_create_query, 'public');
|
||||||
|
GRANT SELECT ON public.citus_tables TO public;
|
||||||
|
ELSE
|
||||||
|
EXECUTE format(citus_tables_create_query, 'citus');
|
||||||
|
ALTER VIEW citus.citus_tables SET SCHEMA pg_catalog;
|
||||||
|
GRANT SELECT ON pg_catalog.citus_tables TO public;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
END;
|
||||||
|
$$;
|
|
@ -1,20 +1,38 @@
|
||||||
CREATE VIEW public.citus_tables AS
|
DO $$
|
||||||
SELECT
|
declare
|
||||||
logicalrelid AS table_name,
|
citus_tables_create_query text;
|
||||||
CASE WHEN partkey IS NOT NULL THEN 'distributed' ELSE 'reference' END AS citus_table_type,
|
BEGIN
|
||||||
coalesce(column_to_column_name(logicalrelid, partkey), '<none>') AS distribution_column,
|
citus_tables_create_query=$CTCQ$
|
||||||
colocationid AS colocation_id,
|
CREATE OR REPLACE VIEW %I.citus_tables AS
|
||||||
pg_size_pretty(citus_total_relation_size(logicalrelid, fail_on_error := false)) AS table_size,
|
SELECT
|
||||||
(select count(*) from pg_dist_shard where logicalrelid = p.logicalrelid) AS shard_count,
|
logicalrelid AS table_name,
|
||||||
pg_get_userbyid(relowner) AS table_owner,
|
CASE WHEN partkey IS NOT NULL THEN 'distributed' ELSE 'reference' END AS citus_table_type,
|
||||||
amname AS access_method
|
coalesce(column_to_column_name(logicalrelid, partkey), '<none>') AS distribution_column,
|
||||||
FROM
|
colocationid AS colocation_id,
|
||||||
pg_dist_partition p
|
pg_size_pretty(citus_total_relation_size(logicalrelid, fail_on_error := false)) AS table_size,
|
||||||
JOIN
|
(select count(*) from pg_dist_shard where logicalrelid = p.logicalrelid) AS shard_count,
|
||||||
pg_class c ON (p.logicalrelid = c.oid)
|
pg_get_userbyid(relowner) AS table_owner,
|
||||||
LEFT JOIN
|
amname AS access_method
|
||||||
pg_am a ON (a.oid = c.relam)
|
FROM
|
||||||
WHERE
|
pg_dist_partition p
|
||||||
partkey IS NOT NULL OR repmodel = 't'
|
JOIN
|
||||||
ORDER BY
|
pg_class c ON (p.logicalrelid = c.oid)
|
||||||
logicalrelid::text;
|
LEFT JOIN
|
||||||
|
pg_am a ON (a.oid = c.relam)
|
||||||
|
WHERE
|
||||||
|
partkey IS NOT NULL OR repmodel = 't'
|
||||||
|
ORDER BY
|
||||||
|
logicalrelid::text;
|
||||||
|
$CTCQ$;
|
||||||
|
|
||||||
|
IF EXISTS (SELECT 1 FROM pg_namespace WHERE nspname = 'public') THEN
|
||||||
|
EXECUTE format(citus_tables_create_query, 'public');
|
||||||
|
GRANT SELECT ON public.citus_tables TO public;
|
||||||
|
ELSE
|
||||||
|
EXECUTE format(citus_tables_create_query, 'citus');
|
||||||
|
ALTER VIEW citus.citus_tables SET SCHEMA pg_catalog;
|
||||||
|
GRANT SELECT ON pg_catalog.citus_tables TO public;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
END;
|
||||||
|
$$;
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_update_table_statistics(relation regclass)
|
||||||
|
RETURNS VOID
|
||||||
|
LANGUAGE C STRICT
|
||||||
|
AS 'MODULE_PATHNAME', $$citus_update_table_statistics$$;
|
||||||
|
COMMENT ON FUNCTION pg_catalog.citus_update_table_statistics(regclass)
|
||||||
|
IS 'updates shard statistics of the given table';
|
|
@ -1,17 +1,6 @@
|
||||||
CREATE FUNCTION pg_catalog.citus_update_table_statistics(relation regclass)
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_update_table_statistics(relation regclass)
|
||||||
RETURNS VOID AS $$
|
RETURNS VOID
|
||||||
DECLARE
|
LANGUAGE C STRICT
|
||||||
colocated_tables regclass[];
|
AS 'MODULE_PATHNAME', $$citus_update_table_statistics$$;
|
||||||
BEGIN
|
|
||||||
SELECT get_colocated_table_array(relation) INTO colocated_tables;
|
|
||||||
|
|
||||||
PERFORM
|
|
||||||
master_update_shard_statistics(shardid)
|
|
||||||
FROM
|
|
||||||
pg_dist_shard
|
|
||||||
WHERE
|
|
||||||
logicalrelid = ANY (colocated_tables);
|
|
||||||
END;
|
|
||||||
$$ LANGUAGE 'plpgsql';
|
|
||||||
COMMENT ON FUNCTION pg_catalog.citus_update_table_statistics(regclass)
|
COMMENT ON FUNCTION pg_catalog.citus_update_table_statistics(regclass)
|
||||||
IS 'updates shard statistics of the given table and its colocated tables';
|
IS 'updates shard statistics of the given table';
|
||||||
|
|
|
@ -5,12 +5,13 @@ FROM (
|
||||||
FROM pg_class c
|
FROM pg_class c
|
||||||
JOIN pg_inherits i ON (c.oid = inhrelid)
|
JOIN pg_inherits i ON (c.oid = inhrelid)
|
||||||
JOIN pg_partitioned_table p ON (inhparent = partrelid)
|
JOIN pg_partitioned_table p ON (inhparent = partrelid)
|
||||||
JOIN pg_attribute a ON (partrelid = attrelid AND ARRAY[attnum] <@ string_to_array(partattrs::text, ' ')::int2[])
|
JOIN pg_attribute a ON (partrelid = attrelid)
|
||||||
JOIN pg_type t ON (atttypid = t.oid)
|
JOIN pg_type t ON (atttypid = t.oid)
|
||||||
JOIN pg_namespace tn ON (t.typnamespace = tn.oid)
|
JOIN pg_namespace tn ON (t.typnamespace = tn.oid)
|
||||||
LEFT JOIN pg_am am ON (c.relam = am.oid),
|
LEFT JOIN pg_am am ON (c.relam = am.oid),
|
||||||
pg_catalog.time_partition_range(c.oid)
|
pg_catalog.time_partition_range(c.oid)
|
||||||
WHERE c.relpartbound IS NOT NULL AND p.partstrat = 'r' AND p.partnatts = 1
|
WHERE c.relpartbound IS NOT NULL AND p.partstrat = 'r' AND p.partnatts = 1
|
||||||
|
AND a.attnum = ANY(partattrs::int2[])
|
||||||
) partitions
|
) partitions
|
||||||
ORDER BY partrelid::text, lower_bound;
|
ORDER BY partrelid::text, lower_bound;
|
||||||
|
|
||||||
|
|
|
@ -5,12 +5,13 @@ FROM (
|
||||||
FROM pg_class c
|
FROM pg_class c
|
||||||
JOIN pg_inherits i ON (c.oid = inhrelid)
|
JOIN pg_inherits i ON (c.oid = inhrelid)
|
||||||
JOIN pg_partitioned_table p ON (inhparent = partrelid)
|
JOIN pg_partitioned_table p ON (inhparent = partrelid)
|
||||||
JOIN pg_attribute a ON (partrelid = attrelid AND ARRAY[attnum] <@ string_to_array(partattrs::text, ' ')::int2[])
|
JOIN pg_attribute a ON (partrelid = attrelid)
|
||||||
JOIN pg_type t ON (atttypid = t.oid)
|
JOIN pg_type t ON (atttypid = t.oid)
|
||||||
JOIN pg_namespace tn ON (t.typnamespace = tn.oid)
|
JOIN pg_namespace tn ON (t.typnamespace = tn.oid)
|
||||||
LEFT JOIN pg_am am ON (c.relam = am.oid),
|
LEFT JOIN pg_am am ON (c.relam = am.oid),
|
||||||
pg_catalog.time_partition_range(c.oid)
|
pg_catalog.time_partition_range(c.oid)
|
||||||
WHERE c.relpartbound IS NOT NULL AND p.partstrat = 'r' AND p.partnatts = 1
|
WHERE c.relpartbound IS NOT NULL AND p.partstrat = 'r' AND p.partnatts = 1
|
||||||
|
AND a.attnum = ANY(partattrs::int2[])
|
||||||
) partitions
|
) partitions
|
||||||
ORDER BY partrelid::text, lower_bound;
|
ORDER BY partrelid::text, lower_bound;
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
|
|
||||||
#include "catalog/pg_type.h"
|
#include "catalog/pg_type.h"
|
||||||
#include "distributed/connection_management.h"
|
#include "distributed/connection_management.h"
|
||||||
|
#include "distributed/intermediate_result_pruning.h"
|
||||||
#include "distributed/listutils.h"
|
#include "distributed/listutils.h"
|
||||||
#include "distributed/maintenanced.h"
|
#include "distributed/maintenanced.h"
|
||||||
#include "distributed/metadata_sync.h"
|
#include "distributed/metadata_sync.h"
|
||||||
|
@ -104,7 +105,7 @@ wait_until_metadata_sync(PG_FUNCTION_ARGS)
|
||||||
}
|
}
|
||||||
|
|
||||||
MultiConnection *connection = GetNodeConnection(FORCE_NEW_CONNECTION,
|
MultiConnection *connection = GetNodeConnection(FORCE_NEW_CONNECTION,
|
||||||
"localhost", PostPortNumber);
|
LOCAL_HOST_NAME, PostPortNumber);
|
||||||
ExecuteCriticalRemoteCommand(connection, "LISTEN " METADATA_SYNC_CHANNEL);
|
ExecuteCriticalRemoteCommand(connection, "LISTEN " METADATA_SYNC_CHANNEL);
|
||||||
|
|
||||||
int waitFlags = WL_SOCKET_READABLE | WL_TIMEOUT | WL_POSTMASTER_DEATH;
|
int waitFlags = WL_SOCKET_READABLE | WL_TIMEOUT | WL_POSTMASTER_DEATH;
|
||||||
|
|
|
@ -36,12 +36,13 @@ create_progress(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
uint64 magicNumber = PG_GETARG_INT64(0);
|
uint64 magicNumber = PG_GETARG_INT64(0);
|
||||||
int stepCount = PG_GETARG_INT32(1);
|
int stepCount = PG_GETARG_INT32(1);
|
||||||
ProgressMonitorData *monitor = CreateProgressMonitor(magicNumber, stepCount,
|
dsm_handle dsmHandle;
|
||||||
sizeof(uint64), 0);
|
ProgressMonitorData *monitor = CreateProgressMonitor(stepCount,
|
||||||
|
sizeof(uint64), &dsmHandle);
|
||||||
|
|
||||||
if (monitor != NULL)
|
if (monitor != NULL)
|
||||||
{
|
{
|
||||||
uint64 *steps = (uint64 *) monitor->steps;
|
uint64 *steps = (uint64 *) ProgressMonitorSteps(monitor);
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for (; i < stepCount; i++)
|
for (; i < stepCount; i++)
|
||||||
|
@ -50,6 +51,7 @@ create_progress(PG_FUNCTION_ARGS)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RegisterProgressMonitor(magicNumber, 0, dsmHandle);
|
||||||
PG_RETURN_VOID();
|
PG_RETURN_VOID();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,7 +66,7 @@ update_progress(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
if (monitor != NULL && step < monitor->stepCount)
|
if (monitor != NULL && step < monitor->stepCount)
|
||||||
{
|
{
|
||||||
uint64 *steps = (uint64 *) monitor->steps;
|
uint64 *steps = (uint64 *) ProgressMonitorSteps(monitor);
|
||||||
steps[step] = newValue;
|
steps[step] = newValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -93,7 +95,7 @@ show_progress(PG_FUNCTION_ARGS)
|
||||||
ProgressMonitorData *monitor = NULL;
|
ProgressMonitorData *monitor = NULL;
|
||||||
foreach_ptr(monitor, monitorList)
|
foreach_ptr(monitor, monitorList)
|
||||||
{
|
{
|
||||||
uint64 *steps = monitor->steps;
|
uint64 *steps = ProgressMonitorSteps(monitor);
|
||||||
|
|
||||||
for (int stepIndex = 0; stepIndex < monitor->stepCount; stepIndex++)
|
for (int stepIndex = 0; stepIndex < monitor->stepCount; stepIndex++)
|
||||||
{
|
{
|
||||||
|
|
|
@ -17,10 +17,10 @@
|
||||||
|
|
||||||
#include "access/xact.h"
|
#include "access/xact.h"
|
||||||
#include "distributed/connection_management.h"
|
#include "distributed/connection_management.h"
|
||||||
|
#include "distributed/coordinator_protocol.h"
|
||||||
#include "distributed/function_utils.h"
|
#include "distributed/function_utils.h"
|
||||||
#include "distributed/intermediate_result_pruning.h"
|
#include "distributed/intermediate_result_pruning.h"
|
||||||
#include "distributed/lock_graph.h"
|
#include "distributed/lock_graph.h"
|
||||||
#include "distributed/coordinator_protocol.h"
|
|
||||||
#include "distributed/metadata_cache.h"
|
#include "distributed/metadata_cache.h"
|
||||||
#include "distributed/remote_commands.h"
|
#include "distributed/remote_commands.h"
|
||||||
#include "distributed/run_from_same_connection.h"
|
#include "distributed/run_from_same_connection.h"
|
||||||
|
|
|
@ -18,9 +18,14 @@
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
#include "pgstat.h"
|
#include "pgstat.h"
|
||||||
|
|
||||||
|
#include "distributed/transaction_management.h"
|
||||||
|
|
||||||
|
|
||||||
static Size MemoryContextTotalSpace(MemoryContext context);
|
static Size MemoryContextTotalSpace(MemoryContext context);
|
||||||
|
|
||||||
PG_FUNCTION_INFO_V1(top_transaction_context_size);
|
PG_FUNCTION_INFO_V1(top_transaction_context_size);
|
||||||
|
PG_FUNCTION_INFO_V1(coordinated_transaction_should_use_2PC);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* top_transaction_context_size returns current size of TopTransactionContext.
|
* top_transaction_context_size returns current size of TopTransactionContext.
|
||||||
|
@ -54,3 +59,20 @@ MemoryContextTotalSpace(MemoryContext context)
|
||||||
|
|
||||||
return totalSpace;
|
return totalSpace;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* coordinated_transaction_should_use_2PC returns true if the transaction is in a
|
||||||
|
* coordinated transaction and uses 2PC. If the transaction is nott in a
|
||||||
|
* coordinated transaction, the function throws an error.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
coordinated_transaction_should_use_2PC(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
if (!InCoordinatedTransaction())
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("The transaction is not a coordinated transaction")));
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_RETURN_BOOL(GetCoordinatedTransactionShouldUse2PC());
|
||||||
|
}
|
||||||
|
|
|
@ -793,7 +793,8 @@ CheckConflictingRelationAccesses(Oid relationId, ShardPlacementAccessType access
|
||||||
"foreign keys. Any parallel modification to "
|
"foreign keys. Any parallel modification to "
|
||||||
"those hash distributed tables in the same "
|
"those hash distributed tables in the same "
|
||||||
"transaction can only be executed in sequential query "
|
"transaction can only be executed in sequential query "
|
||||||
"execution mode", relationName)));
|
"execution mode",
|
||||||
|
relationName != NULL ? relationName : "<dropped>")));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Switching to sequential mode is admittedly confusing and, could be useless
|
* Switching to sequential mode is admittedly confusing and, could be useless
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include "distributed/connection_management.h"
|
#include "distributed/connection_management.h"
|
||||||
#include "distributed/listutils.h"
|
#include "distributed/listutils.h"
|
||||||
#include "distributed/metadata_cache.h"
|
#include "distributed/metadata_cache.h"
|
||||||
|
#include "distributed/placement_connection.h"
|
||||||
#include "distributed/remote_commands.h"
|
#include "distributed/remote_commands.h"
|
||||||
#include "distributed/remote_transaction.h"
|
#include "distributed/remote_transaction.h"
|
||||||
#include "distributed/transaction_identifier.h"
|
#include "distributed/transaction_identifier.h"
|
||||||
|
@ -782,8 +783,16 @@ CoordinatedRemoteTransactionsPrepare(void)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
StartRemoteTransactionPrepare(connection);
|
/*
|
||||||
connectionList = lappend(connectionList, connection);
|
* Check if any DML or DDL is executed over the connection on any
|
||||||
|
* placement/table. If yes, we start preparing the transaction, otherwise
|
||||||
|
* we skip prepare since the connection didn't perform any write (read-only)
|
||||||
|
*/
|
||||||
|
if (ConnectionModifiedPlacement(connection))
|
||||||
|
{
|
||||||
|
StartRemoteTransactionPrepare(connection);
|
||||||
|
connectionList = lappend(connectionList, connection);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool raiseInterrupts = true;
|
bool raiseInterrupts = true;
|
||||||
|
@ -798,6 +807,10 @@ CoordinatedRemoteTransactionsPrepare(void)
|
||||||
|
|
||||||
if (transaction->transactionState != REMOTE_TRANS_PREPARING)
|
if (transaction->transactionState != REMOTE_TRANS_PREPARING)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Verify that the connection didn't modify any placement
|
||||||
|
*/
|
||||||
|
Assert(!ConnectionModifiedPlacement(connection));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -96,9 +96,16 @@ MemoryContext CommitContext = NULL;
|
||||||
/*
|
/*
|
||||||
* Should this coordinated transaction use 2PC? Set by
|
* Should this coordinated transaction use 2PC? Set by
|
||||||
* CoordinatedTransactionUse2PC(), e.g. if DDL was issued and
|
* CoordinatedTransactionUse2PC(), e.g. if DDL was issued and
|
||||||
* MultiShardCommitProtocol was set to 2PC.
|
* MultiShardCommitProtocol was set to 2PC. But, even if this
|
||||||
|
* flag is set, the transaction manager is smart enough to only
|
||||||
|
* do 2PC on the remote connections that did a modification.
|
||||||
|
*
|
||||||
|
* As a variable name ShouldCoordinatedTransactionUse2PC could
|
||||||
|
* be improved. We use CoordinatedTransactionShouldUse2PC() as the
|
||||||
|
* public API function, hence couldn't come up with a better name
|
||||||
|
* for the underlying variable at the moment.
|
||||||
*/
|
*/
|
||||||
bool CoordinatedTransactionUses2PC = false;
|
bool ShouldCoordinatedTransactionUse2PC = false;
|
||||||
|
|
||||||
/* if disabled, distributed statements in a function may run as separate transactions */
|
/* if disabled, distributed statements in a function may run as separate transactions */
|
||||||
bool FunctionOpensTransactionBlock = true;
|
bool FunctionOpensTransactionBlock = true;
|
||||||
|
@ -183,15 +190,29 @@ InCoordinatedTransaction(void)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CoordinatedTransactionUse2PC() signals that the current coordinated
|
* CoordinatedTransactionShouldUse2PC() signals that the current coordinated
|
||||||
* transaction should use 2PC to commit.
|
* transaction should use 2PC to commit.
|
||||||
|
*
|
||||||
|
* Note that even if 2PC is enabled, it is only used for connections that make
|
||||||
|
* modification (DML or DDL).
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
CoordinatedTransactionUse2PC(void)
|
CoordinatedTransactionShouldUse2PC(void)
|
||||||
{
|
{
|
||||||
Assert(InCoordinatedTransaction());
|
Assert(InCoordinatedTransaction());
|
||||||
|
|
||||||
CoordinatedTransactionUses2PC = true;
|
ShouldCoordinatedTransactionUse2PC = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetCoordinatedTransactionShouldUse2PC is a wrapper function to read the value
|
||||||
|
* of CoordinatedTransactionShouldUse2PCFlag.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
GetCoordinatedTransactionShouldUse2PC(void)
|
||||||
|
{
|
||||||
|
return ShouldCoordinatedTransactionUse2PC;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -297,28 +318,8 @@ CoordinatedTransactionCallback(XactEvent event, void *arg)
|
||||||
/* stop propagating notices from workers, we know the query is failed */
|
/* stop propagating notices from workers, we know the query is failed */
|
||||||
DisableWorkerMessagePropagation();
|
DisableWorkerMessagePropagation();
|
||||||
|
|
||||||
/*
|
RemoveIntermediateResultsDirectory();
|
||||||
* FIXME: Add warning for the COORD_TRANS_COMMITTED case. That
|
|
||||||
* can be reached if this backend fails after the
|
|
||||||
* XACT_EVENT_PRE_COMMIT state.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Call other parts of citus that need to integrate into
|
|
||||||
* transaction management. Do so before doing other work, so the
|
|
||||||
* callbacks still can perform work if needed.
|
|
||||||
*/
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* On Windows it's not possible to delete a file before you've closed all
|
|
||||||
* handles to it (rmdir will return success but not take effect). Since
|
|
||||||
* we're in an ABORT handler it's very likely that not all handles have
|
|
||||||
* been closed; force them closed here before running
|
|
||||||
* RemoveIntermediateResultsDirectory.
|
|
||||||
*/
|
|
||||||
AtEOXact_Files(false);
|
|
||||||
RemoveIntermediateResultsDirectory();
|
|
||||||
}
|
|
||||||
ResetShardPlacementTransactionState();
|
ResetShardPlacementTransactionState();
|
||||||
|
|
||||||
/* handles both already prepared and open transactions */
|
/* handles both already prepared and open transactions */
|
||||||
|
@ -425,7 +426,7 @@ CoordinatedTransactionCallback(XactEvent event, void *arg)
|
||||||
*/
|
*/
|
||||||
MarkFailedShardPlacements();
|
MarkFailedShardPlacements();
|
||||||
|
|
||||||
if (CoordinatedTransactionUses2PC)
|
if (ShouldCoordinatedTransactionUse2PC)
|
||||||
{
|
{
|
||||||
CoordinatedRemoteTransactionsPrepare();
|
CoordinatedRemoteTransactionsPrepare();
|
||||||
CurrentCoordinatedTransactionState = COORD_TRANS_PREPARED;
|
CurrentCoordinatedTransactionState = COORD_TRANS_PREPARED;
|
||||||
|
@ -453,7 +454,7 @@ CoordinatedTransactionCallback(XactEvent event, void *arg)
|
||||||
* Check again whether shards/placement successfully
|
* Check again whether shards/placement successfully
|
||||||
* committed. This handles failure at COMMIT/PREPARE time.
|
* committed. This handles failure at COMMIT/PREPARE time.
|
||||||
*/
|
*/
|
||||||
PostCommitMarkFailedShardPlacements(CoordinatedTransactionUses2PC);
|
PostCommitMarkFailedShardPlacements(ShouldCoordinatedTransactionUse2PC);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -485,7 +486,7 @@ ResetGlobalVariables()
|
||||||
FreeSavedExplainPlan();
|
FreeSavedExplainPlan();
|
||||||
dlist_init(&InProgressTransactions);
|
dlist_init(&InProgressTransactions);
|
||||||
activeSetStmts = NULL;
|
activeSetStmts = NULL;
|
||||||
CoordinatedTransactionUses2PC = false;
|
ShouldCoordinatedTransactionUse2PC = false;
|
||||||
TransactionModifiedNodeMetadata = false;
|
TransactionModifiedNodeMetadata = false;
|
||||||
MetadataSyncOnCommit = false;
|
MetadataSyncOnCommit = false;
|
||||||
ResetWorkerErrorIndication();
|
ResetWorkerErrorIndication();
|
||||||
|
|
|
@ -96,7 +96,7 @@ SendCommandToWorkerAsUser(const char *nodeName, int32 nodePort, const char *node
|
||||||
uint32 connectionFlags = 0;
|
uint32 connectionFlags = 0;
|
||||||
|
|
||||||
UseCoordinatedTransaction();
|
UseCoordinatedTransaction();
|
||||||
CoordinatedTransactionUse2PC();
|
CoordinatedTransactionShouldUse2PC();
|
||||||
|
|
||||||
MultiConnection *transactionConnection = GetNodeUserDatabaseConnection(
|
MultiConnection *transactionConnection = GetNodeUserDatabaseConnection(
|
||||||
connectionFlags, nodeName,
|
connectionFlags, nodeName,
|
||||||
|
@ -404,7 +404,7 @@ SendCommandToWorkersParamsInternal(TargetWorkerSet targetWorkerSet, const char *
|
||||||
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet, ShareLock);
|
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet, ShareLock);
|
||||||
|
|
||||||
UseCoordinatedTransaction();
|
UseCoordinatedTransaction();
|
||||||
CoordinatedTransactionUse2PC();
|
CoordinatedTransactionShouldUse2PC();
|
||||||
|
|
||||||
/* open connections in parallel */
|
/* open connections in parallel */
|
||||||
WorkerNode *workerNode = NULL;
|
WorkerNode *workerNode = NULL;
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#include "access/htup_details.h"
|
#include "access/htup_details.h"
|
||||||
#include "distributed/distribution_column.h"
|
#include "distributed/distribution_column.h"
|
||||||
#include "distributed/metadata_cache.h"
|
#include "distributed/metadata_cache.h"
|
||||||
|
#include "distributed/multi_partitioning_utils.h"
|
||||||
#include "distributed/version_compat.h"
|
#include "distributed/version_compat.h"
|
||||||
#include "nodes/makefuncs.h"
|
#include "nodes/makefuncs.h"
|
||||||
#include "nodes/nodes.h"
|
#include "nodes/nodes.h"
|
||||||
|
@ -115,6 +116,53 @@ column_to_column_name(PG_FUNCTION_ARGS)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FindColumnWithNameOnTargetRelation gets a source table and
|
||||||
|
* column name. The function returns the the column with the
|
||||||
|
* same name on the target table.
|
||||||
|
*
|
||||||
|
* Note that due to dropping columns, the parent's distribution key may not
|
||||||
|
* match the partition's distribution key. See issue #5123.
|
||||||
|
*
|
||||||
|
* The function throws error if the input or output is not valid or does
|
||||||
|
* not exist.
|
||||||
|
*/
|
||||||
|
Var *
|
||||||
|
FindColumnWithNameOnTargetRelation(Oid sourceRelationId, char *sourceColumnName,
|
||||||
|
Oid targetRelationId)
|
||||||
|
{
|
||||||
|
if (sourceColumnName == NULL || sourceColumnName[0] == '\0')
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errcode(ERRCODE_UNDEFINED_COLUMN),
|
||||||
|
errmsg("cannot find the given column on table \"%s\"",
|
||||||
|
generate_qualified_relation_name(sourceRelationId))));
|
||||||
|
}
|
||||||
|
|
||||||
|
AttrNumber attributeNumberOnTarget = get_attnum(targetRelationId, sourceColumnName);
|
||||||
|
if (attributeNumberOnTarget == InvalidAttrNumber)
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("Column \"%s\" does not exist on "
|
||||||
|
"relation \"%s\"", sourceColumnName,
|
||||||
|
get_rel_name(targetRelationId))));
|
||||||
|
}
|
||||||
|
|
||||||
|
Index varNo = 1;
|
||||||
|
Oid targetTypeId = InvalidOid;
|
||||||
|
int32 targetTypMod = 0;
|
||||||
|
Oid targetCollation = InvalidOid;
|
||||||
|
Index varlevelsup = 0;
|
||||||
|
|
||||||
|
/* this function throws error in case anything goes wrong */
|
||||||
|
get_atttypetypmodcoll(targetRelationId, attributeNumberOnTarget,
|
||||||
|
&targetTypeId, &targetTypMod, &targetCollation);
|
||||||
|
Var *targetColumn =
|
||||||
|
makeVar(varNo, attributeNumberOnTarget, targetTypeId, targetTypMod,
|
||||||
|
targetCollation, varlevelsup);
|
||||||
|
|
||||||
|
return targetColumn;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BuildDistributionKeyFromColumnName builds a simple distribution key consisting
|
* BuildDistributionKeyFromColumnName builds a simple distribution key consisting
|
||||||
* only out of a reference to the column of name columnName. Errors out if the
|
* only out of a reference to the column of name columnName. Errors out if the
|
||||||
|
|
|
@ -100,9 +100,6 @@ static ForeignConstraintRelationshipNode * CreateOrFindNode(HTAB *adjacencyLists
|
||||||
relid);
|
relid);
|
||||||
static List * GetConnectedListHelper(ForeignConstraintRelationshipNode *node,
|
static List * GetConnectedListHelper(ForeignConstraintRelationshipNode *node,
|
||||||
bool isReferencing);
|
bool isReferencing);
|
||||||
static HTAB * CreateOidVisitedHashSet(void);
|
|
||||||
static bool OidVisited(HTAB *oidVisitedMap, Oid oid);
|
|
||||||
static void VisitOid(HTAB *oidVisitedMap, Oid oid);
|
|
||||||
static List * GetForeignConstraintRelationshipHelper(Oid relationId, bool isReferencing);
|
static List * GetForeignConstraintRelationshipHelper(Oid relationId, bool isReferencing);
|
||||||
|
|
||||||
|
|
||||||
|
@ -442,7 +439,7 @@ GetConnectedListHelper(ForeignConstraintRelationshipNode *node, bool isReferenci
|
||||||
* As hash_create allocates memory in heap, callers are responsible to call
|
* As hash_create allocates memory in heap, callers are responsible to call
|
||||||
* hash_destroy when appropriate.
|
* hash_destroy when appropriate.
|
||||||
*/
|
*/
|
||||||
static HTAB *
|
HTAB *
|
||||||
CreateOidVisitedHashSet(void)
|
CreateOidVisitedHashSet(void)
|
||||||
{
|
{
|
||||||
HASHCTL info = { 0 };
|
HASHCTL info = { 0 };
|
||||||
|
@ -464,7 +461,7 @@ CreateOidVisitedHashSet(void)
|
||||||
/*
|
/*
|
||||||
* OidVisited returns true if given oid is visited according to given oid hash-set.
|
* OidVisited returns true if given oid is visited according to given oid hash-set.
|
||||||
*/
|
*/
|
||||||
static bool
|
bool
|
||||||
OidVisited(HTAB *oidVisitedMap, Oid oid)
|
OidVisited(HTAB *oidVisitedMap, Oid oid)
|
||||||
{
|
{
|
||||||
bool found = false;
|
bool found = false;
|
||||||
|
@ -476,7 +473,7 @@ OidVisited(HTAB *oidVisitedMap, Oid oid)
|
||||||
/*
|
/*
|
||||||
* VisitOid sets given oid as visited in given hash-set.
|
* VisitOid sets given oid as visited in given hash-set.
|
||||||
*/
|
*/
|
||||||
static void
|
void
|
||||||
VisitOid(HTAB *oidVisitedMap, Oid oid)
|
VisitOid(HTAB *oidVisitedMap, Oid oid)
|
||||||
{
|
{
|
||||||
bool found = false;
|
bool found = false;
|
||||||
|
|
|
@ -644,7 +644,8 @@ CitusMaintenanceDaemonMain(Datum main_arg)
|
||||||
*/
|
*/
|
||||||
lastShardCleanTime = GetCurrentTimestamp();
|
lastShardCleanTime = GetCurrentTimestamp();
|
||||||
|
|
||||||
numberOfDroppedShards = TryDropMarkedShards();
|
bool waitForCleanupLock = false;
|
||||||
|
numberOfDroppedShards = TryDropMarkedShards(waitForCleanupLock);
|
||||||
}
|
}
|
||||||
|
|
||||||
CommitTransactionCommand();
|
CommitTransactionCommand();
|
||||||
|
|
|
@ -548,13 +548,14 @@ PartitionParentOid(Oid partitionOid)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* LongestPartitionName is a uitility function that returns the partition
|
* PartitionWithLongestNameRelationId is a utility function that returns the
|
||||||
* name which is the longest in terms of number of characters.
|
* oid of the partition table that has the longest name in terms of number of
|
||||||
|
* characters.
|
||||||
*/
|
*/
|
||||||
char *
|
Oid
|
||||||
LongestPartitionName(Oid parentRelationId)
|
PartitionWithLongestNameRelationId(Oid parentRelationId)
|
||||||
{
|
{
|
||||||
char *longestName = NULL;
|
Oid longestNamePartitionId = InvalidOid;
|
||||||
int longestNameLength = 0;
|
int longestNameLength = 0;
|
||||||
List *partitionList = PartitionList(parentRelationId);
|
List *partitionList = PartitionList(parentRelationId);
|
||||||
|
|
||||||
|
@ -565,12 +566,12 @@ LongestPartitionName(Oid parentRelationId)
|
||||||
int partitionNameLength = strnlen(partitionName, NAMEDATALEN);
|
int partitionNameLength = strnlen(partitionName, NAMEDATALEN);
|
||||||
if (partitionNameLength > longestNameLength)
|
if (partitionNameLength > longestNameLength)
|
||||||
{
|
{
|
||||||
longestName = partitionName;
|
longestNamePartitionId = partitionRelationId;
|
||||||
longestNameLength = partitionNameLength;
|
longestNameLength = partitionNameLength;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return longestName;
|
return longestNamePartitionId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -193,7 +193,7 @@ EnsureReferenceTablesExistOnAllNodesExtended(char transferMode)
|
||||||
int connectionFlags = OUTSIDE_TRANSACTION;
|
int connectionFlags = OUTSIDE_TRANSACTION;
|
||||||
|
|
||||||
MultiConnection *connection = GetNodeUserDatabaseConnection(
|
MultiConnection *connection = GetNodeUserDatabaseConnection(
|
||||||
connectionFlags, "localhost", PostPortNumber,
|
connectionFlags, LocalHostName, PostPortNumber,
|
||||||
userName, NULL);
|
userName, NULL);
|
||||||
|
|
||||||
if (PQstatus(connection->pgConn) == CONNECTION_OK)
|
if (PQstatus(connection->pgConn) == CONNECTION_OK)
|
||||||
|
|
|
@ -387,6 +387,37 @@ SetLocktagForShardDistributionMetadata(int64 shardId, LOCKTAG *tag)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* LockPlacementCleanup takes an exclusive lock to ensure that only one process
|
||||||
|
* can cleanup placements at the same time.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
LockPlacementCleanup(void)
|
||||||
|
{
|
||||||
|
LOCKTAG tag;
|
||||||
|
const bool sessionLock = false;
|
||||||
|
const bool dontWait = false;
|
||||||
|
SET_LOCKTAG_PLACEMENT_CLEANUP(tag);
|
||||||
|
(void) LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TryLockPlacementCleanup takes an exclusive lock to ensure that only one
|
||||||
|
* process can cleanup placements at the same time.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
TryLockPlacementCleanup(void)
|
||||||
|
{
|
||||||
|
LOCKTAG tag;
|
||||||
|
const bool sessionLock = false;
|
||||||
|
const bool dontWait = true;
|
||||||
|
SET_LOCKTAG_PLACEMENT_CLEANUP(tag);
|
||||||
|
bool lockAcquired = LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait);
|
||||||
|
return lockAcquired;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* LockReferencedReferenceShardDistributionMetadata acquires shard distribution
|
* LockReferencedReferenceShardDistributionMetadata acquires shard distribution
|
||||||
* metadata locks with the given lock mode on the reference tables which has a
|
* metadata locks with the given lock mode on the reference tables which has a
|
||||||
|
@ -502,8 +533,6 @@ LockShardResource(uint64 shardId, LOCKMODE lockmode)
|
||||||
const bool sessionLock = false;
|
const bool sessionLock = false;
|
||||||
const bool dontWait = false;
|
const bool dontWait = false;
|
||||||
|
|
||||||
AssertArg(shardId != INVALID_SHARD_ID);
|
|
||||||
|
|
||||||
SET_LOCKTAG_SHARD_RESOURCE(tag, MyDatabaseId, shardId);
|
SET_LOCKTAG_SHARD_RESOURCE(tag, MyDatabaseId, shardId);
|
||||||
|
|
||||||
(void) LockAcquire(&tag, lockmode, sessionLock, dontWait);
|
(void) LockAcquire(&tag, lockmode, sessionLock, dontWait);
|
||||||
|
|
|
@ -41,7 +41,7 @@ alter_role_if_exists(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
Node *parseTree = ParseTreeNode(utilityQuery);
|
Node *parseTree = ParseTreeNode(utilityQuery);
|
||||||
|
|
||||||
ProcessUtilityParseTree(parseTree, utilityQuery, PROCESS_UTILITY_TOPLEVEL, NULL,
|
ProcessUtilityParseTree(parseTree, utilityQuery, PROCESS_UTILITY_QUERY, NULL,
|
||||||
None_Receiver, NULL);
|
None_Receiver, NULL);
|
||||||
|
|
||||||
PG_RETURN_BOOL(true);
|
PG_RETURN_BOOL(true);
|
||||||
|
@ -98,7 +98,7 @@ worker_create_or_alter_role(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
ProcessUtilityParseTree(parseTree,
|
ProcessUtilityParseTree(parseTree,
|
||||||
createRoleUtilityQuery,
|
createRoleUtilityQuery,
|
||||||
PROCESS_UTILITY_TOPLEVEL,
|
PROCESS_UTILITY_QUERY,
|
||||||
NULL,
|
NULL,
|
||||||
None_Receiver, NULL);
|
None_Receiver, NULL);
|
||||||
|
|
||||||
|
@ -126,7 +126,7 @@ worker_create_or_alter_role(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
ProcessUtilityParseTree(parseTree,
|
ProcessUtilityParseTree(parseTree,
|
||||||
alterRoleUtilityQuery,
|
alterRoleUtilityQuery,
|
||||||
PROCESS_UTILITY_TOPLEVEL,
|
PROCESS_UTILITY_QUERY,
|
||||||
NULL,
|
NULL,
|
||||||
None_Receiver, NULL);
|
None_Receiver, NULL);
|
||||||
|
|
||||||
|
|
|
@ -11,10 +11,17 @@
|
||||||
|
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "miscadmin.h"
|
||||||
|
#include "utils/builtins.h"
|
||||||
|
#include "utils/fmgrprotos.h"
|
||||||
#include "utils/lsyscache.h"
|
#include "utils/lsyscache.h"
|
||||||
|
#include "distributed/coordinator_protocol.h"
|
||||||
|
#include "distributed/metadata_utility.h"
|
||||||
#include "distributed/relay_utility.h"
|
#include "distributed/relay_utility.h"
|
||||||
#include "distributed/shard_utils.h"
|
#include "distributed/shard_utils.h"
|
||||||
|
|
||||||
|
static int GetLargestShardId(void);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GetTableLocalShardOid returns the oid of the shard from the given distributed
|
* GetTableLocalShardOid returns the oid of the shard from the given distributed
|
||||||
* relation with the shardId.
|
* relation with the shardId.
|
||||||
|
@ -36,3 +43,81 @@ GetTableLocalShardOid(Oid citusTableOid, uint64 shardId)
|
||||||
|
|
||||||
return shardRelationOid;
|
return shardRelationOid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetLongestShardName is a utility function that returns the name of the shard of a
|
||||||
|
* table that has the longest name in terms of number of characters.
|
||||||
|
*
|
||||||
|
* Both the Oid and name of the table are required so we can create longest shard names
|
||||||
|
* after a RENAME.
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
GetLongestShardName(Oid citusTableOid, char *finalRelationName)
|
||||||
|
{
|
||||||
|
char *longestShardName = pstrdup(finalRelationName);
|
||||||
|
ShardInterval *shardInterval = LoadShardIntervalWithLongestShardName(citusTableOid);
|
||||||
|
AppendShardIdToName(&longestShardName, shardInterval->shardId);
|
||||||
|
|
||||||
|
return longestShardName;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetLongestShardNameForLocalPartition is a utility function that creates a hypothetical shard
|
||||||
|
* name for a partition table that is not distributed yet.
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
GetLongestShardNameForLocalPartition(Oid parentTableOid, char *partitionRelationName)
|
||||||
|
{
|
||||||
|
char *longestShardName = pstrdup(partitionRelationName);
|
||||||
|
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(parentTableOid);
|
||||||
|
int shardIntervalCount = cacheEntry->shardIntervalArrayLength;
|
||||||
|
int newShardId = GetLargestShardId() + shardIntervalCount;
|
||||||
|
AppendShardIdToName(&longestShardName, newShardId);
|
||||||
|
|
||||||
|
return longestShardName;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetLargestShardId returns the biggest shard id, and returns a 10^6 in case of failure
|
||||||
|
* to get the last value from the sequence.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
GetLargestShardId()
|
||||||
|
{
|
||||||
|
Oid savedUserId = InvalidOid;
|
||||||
|
int savedSecurityContext = 0;
|
||||||
|
|
||||||
|
GetUserIdAndSecContext(&savedUserId, &savedSecurityContext);
|
||||||
|
SetUserIdAndSecContext(CitusExtensionOwner(), SECURITY_LOCAL_USERID_CHANGE);
|
||||||
|
|
||||||
|
text *sequenceName = cstring_to_text(SHARDID_SEQUENCE_NAME);
|
||||||
|
Oid sequenceId = ResolveRelationId(sequenceName, false);
|
||||||
|
Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId);
|
||||||
|
|
||||||
|
volatile int64 largestShardId = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pg_sequence_last_value() returns NULL if the sequence value is not yet used.
|
||||||
|
* DirectFunctionCall1() gives an ERROR message on NULL return values, and that's why we
|
||||||
|
* need a PG_TRY block.
|
||||||
|
*/
|
||||||
|
PG_TRY();
|
||||||
|
{
|
||||||
|
Datum lastShardIdDatum = DirectFunctionCall1(pg_sequence_last_value,
|
||||||
|
sequenceIdDatum);
|
||||||
|
largestShardId = DatumGetInt64(lastShardIdDatum);
|
||||||
|
}
|
||||||
|
PG_CATCH();
|
||||||
|
{
|
||||||
|
/* assume that we have a shardId with 7 digits */
|
||||||
|
largestShardId = 1000000;
|
||||||
|
}
|
||||||
|
PG_END_TRY();
|
||||||
|
|
||||||
|
SetUserIdAndSecContext(savedUserId, savedSecurityContext);
|
||||||
|
|
||||||
|
return largestShardId;
|
||||||
|
}
|
||||||
|
|
|
@ -297,7 +297,7 @@ FindShardIntervalIndex(Datum searchedValue, CitusTableCacheEntry *cacheEntry)
|
||||||
ShardInterval **shardIntervalCache = cacheEntry->sortedShardIntervalArray;
|
ShardInterval **shardIntervalCache = cacheEntry->sortedShardIntervalArray;
|
||||||
int shardCount = cacheEntry->shardIntervalArrayLength;
|
int shardCount = cacheEntry->shardIntervalArrayLength;
|
||||||
FmgrInfo *compareFunction = cacheEntry->shardIntervalCompareFunction;
|
FmgrInfo *compareFunction = cacheEntry->shardIntervalCompareFunction;
|
||||||
bool useBinarySearch = (IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) ||
|
bool useBinarySearch = (!IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) ||
|
||||||
!cacheEntry->hasUniformHashDistribution);
|
!cacheEntry->hasUniformHashDistribution);
|
||||||
int shardIndex = INVALID_SHARD_INDEX;
|
int shardIndex = INVALID_SHARD_INDEX;
|
||||||
|
|
||||||
|
|
|
@ -111,12 +111,12 @@ worker_create_or_replace_object(PG_FUNCTION_ARGS)
|
||||||
RenameStmt *renameStmt = CreateRenameStatement(&address, newName);
|
RenameStmt *renameStmt = CreateRenameStatement(&address, newName);
|
||||||
const char *sqlRenameStmt = DeparseTreeNode((Node *) renameStmt);
|
const char *sqlRenameStmt = DeparseTreeNode((Node *) renameStmt);
|
||||||
ProcessUtilityParseTree((Node *) renameStmt, sqlRenameStmt,
|
ProcessUtilityParseTree((Node *) renameStmt, sqlRenameStmt,
|
||||||
PROCESS_UTILITY_TOPLEVEL,
|
PROCESS_UTILITY_QUERY,
|
||||||
NULL, None_Receiver, NULL);
|
NULL, None_Receiver, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* apply create statement locally */
|
/* apply create statement locally */
|
||||||
ProcessUtilityParseTree(parseTree, sqlStatement, PROCESS_UTILITY_TOPLEVEL, NULL,
|
ProcessUtilityParseTree(parseTree, sqlStatement, PROCESS_UTILITY_QUERY, NULL,
|
||||||
None_Receiver, NULL);
|
None_Receiver, NULL);
|
||||||
|
|
||||||
/* type has been created */
|
/* type has been created */
|
||||||
|
|
|
@ -28,13 +28,14 @@
|
||||||
#include "commands/extension.h"
|
#include "commands/extension.h"
|
||||||
#include "commands/sequence.h"
|
#include "commands/sequence.h"
|
||||||
#include "distributed/citus_ruleutils.h"
|
#include "distributed/citus_ruleutils.h"
|
||||||
|
#include "distributed/commands/multi_copy.h"
|
||||||
#include "distributed/commands/utility_hook.h"
|
#include "distributed/commands/utility_hook.h"
|
||||||
#include "distributed/connection_management.h"
|
#include "distributed/connection_management.h"
|
||||||
#include "distributed/listutils.h"
|
|
||||||
#include "distributed/coordinator_protocol.h"
|
#include "distributed/coordinator_protocol.h"
|
||||||
|
#include "distributed/intermediate_results.h"
|
||||||
|
#include "distributed/listutils.h"
|
||||||
#include "distributed/metadata_cache.h"
|
#include "distributed/metadata_cache.h"
|
||||||
#include "distributed/multi_client_executor.h"
|
#include "distributed/multi_client_executor.h"
|
||||||
#include "distributed/commands/multi_copy.h"
|
|
||||||
#include "distributed/multi_logical_optimizer.h"
|
#include "distributed/multi_logical_optimizer.h"
|
||||||
#include "distributed/multi_partitioning_utils.h"
|
#include "distributed/multi_partitioning_utils.h"
|
||||||
#include "distributed/multi_server_executor.h"
|
#include "distributed/multi_server_executor.h"
|
||||||
|
@ -45,6 +46,7 @@
|
||||||
#include "distributed/worker_protocol.h"
|
#include "distributed/worker_protocol.h"
|
||||||
#include "distributed/version_compat.h"
|
#include "distributed/version_compat.h"
|
||||||
#include "nodes/makefuncs.h"
|
#include "nodes/makefuncs.h"
|
||||||
|
#include "parser/parse_relation.h"
|
||||||
#include "storage/lmgr.h"
|
#include "storage/lmgr.h"
|
||||||
#include "tcop/tcopprot.h"
|
#include "tcop/tcopprot.h"
|
||||||
#include "tcop/utility.h"
|
#include "tcop/utility.h"
|
||||||
|
@ -396,7 +398,7 @@ worker_apply_shard_ddl_command(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
/* extend names in ddl command and apply extended command */
|
/* extend names in ddl command and apply extended command */
|
||||||
RelayEventExtendNames(ddlCommandNode, schemaName, shardId);
|
RelayEventExtendNames(ddlCommandNode, schemaName, shardId);
|
||||||
ProcessUtilityParseTree(ddlCommandNode, ddlCommand, PROCESS_UTILITY_TOPLEVEL, NULL,
|
ProcessUtilityParseTree(ddlCommandNode, ddlCommand, PROCESS_UTILITY_QUERY, NULL,
|
||||||
None_Receiver, NULL);
|
None_Receiver, NULL);
|
||||||
|
|
||||||
PG_RETURN_VOID();
|
PG_RETURN_VOID();
|
||||||
|
@ -428,7 +430,7 @@ worker_apply_inter_shard_ddl_command(PG_FUNCTION_ARGS)
|
||||||
RelayEventExtendNamesForInterShardCommands(ddlCommandNode, leftShardId,
|
RelayEventExtendNamesForInterShardCommands(ddlCommandNode, leftShardId,
|
||||||
leftShardSchemaName, rightShardId,
|
leftShardSchemaName, rightShardId,
|
||||||
rightShardSchemaName);
|
rightShardSchemaName);
|
||||||
ProcessUtilityParseTree(ddlCommandNode, ddlCommand, PROCESS_UTILITY_TOPLEVEL, NULL,
|
ProcessUtilityParseTree(ddlCommandNode, ddlCommand, PROCESS_UTILITY_QUERY, NULL,
|
||||||
None_Receiver, NULL);
|
None_Receiver, NULL);
|
||||||
|
|
||||||
PG_RETURN_VOID();
|
PG_RETURN_VOID();
|
||||||
|
@ -461,7 +463,7 @@ worker_apply_sequence_command(PG_FUNCTION_ARGS)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* run the CREATE SEQUENCE command */
|
/* run the CREATE SEQUENCE command */
|
||||||
ProcessUtilityParseTree(commandNode, commandString, PROCESS_UTILITY_TOPLEVEL, NULL,
|
ProcessUtilityParseTree(commandNode, commandString, PROCESS_UTILITY_QUERY, NULL,
|
||||||
None_Receiver, NULL);
|
None_Receiver, NULL);
|
||||||
CommandCounterIncrement();
|
CommandCounterIncrement();
|
||||||
|
|
||||||
|
@ -594,9 +596,6 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
|
||||||
char *sourceSchemaName = NULL;
|
char *sourceSchemaName = NULL;
|
||||||
char *sourceTableName = NULL;
|
char *sourceTableName = NULL;
|
||||||
|
|
||||||
Oid savedUserId = InvalidOid;
|
|
||||||
int savedSecurityContext = 0;
|
|
||||||
|
|
||||||
CheckCitusVersion(ERROR);
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
/* We extract schema names and table names from qualified names */
|
/* We extract schema names and table names from qualified names */
|
||||||
|
@ -613,10 +612,13 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
|
||||||
uint64 shardId = ExtractShardIdFromTableName(shardTableName, false);
|
uint64 shardId = ExtractShardIdFromTableName(shardTableName, false);
|
||||||
LockShardResource(shardId, AccessExclusiveLock);
|
LockShardResource(shardId, AccessExclusiveLock);
|
||||||
|
|
||||||
/* copy remote table's data to this node */
|
/*
|
||||||
|
* Copy into intermediate results directory, which is automatically cleaned on
|
||||||
|
* error.
|
||||||
|
*/
|
||||||
StringInfo localFilePath = makeStringInfo();
|
StringInfo localFilePath = makeStringInfo();
|
||||||
appendStringInfo(localFilePath, "base/%s/%s" UINT64_FORMAT,
|
appendStringInfo(localFilePath, "%s/worker_append_table_to_shard_" UINT64_FORMAT,
|
||||||
PG_JOB_CACHE_DIR, TABLE_FILE_PREFIX, shardId);
|
CreateIntermediateResultsDirectory(), shardId);
|
||||||
|
|
||||||
char *sourceQualifiedName = quote_qualified_identifier(sourceSchemaName,
|
char *sourceQualifiedName = quote_qualified_identifier(sourceSchemaName,
|
||||||
sourceTableName);
|
sourceTableName);
|
||||||
|
@ -641,7 +643,8 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
|
||||||
appendStringInfo(sourceCopyCommand, COPY_OUT_COMMAND, sourceQualifiedName);
|
appendStringInfo(sourceCopyCommand, COPY_OUT_COMMAND, sourceQualifiedName);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool received = ReceiveRegularFile(sourceNodeName, sourceNodePort, NULL,
|
char *userName = CurrentUserName();
|
||||||
|
bool received = ReceiveRegularFile(sourceNodeName, sourceNodePort, userName,
|
||||||
sourceCopyCommand,
|
sourceCopyCommand,
|
||||||
localFilePath);
|
localFilePath);
|
||||||
if (!received)
|
if (!received)
|
||||||
|
@ -664,17 +667,36 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
|
||||||
/* make sure we are allowed to execute the COPY command */
|
/* make sure we are allowed to execute the COPY command */
|
||||||
CheckCopyPermissions(localCopyCommand);
|
CheckCopyPermissions(localCopyCommand);
|
||||||
|
|
||||||
/* need superuser to copy from files */
|
Relation shardRelation = table_openrv(localCopyCommand->relation, RowExclusiveLock);
|
||||||
GetUserIdAndSecContext(&savedUserId, &savedSecurityContext);
|
|
||||||
SetUserIdAndSecContext(CitusExtensionOwner(), SECURITY_LOCAL_USERID_CHANGE);
|
|
||||||
|
|
||||||
ProcessUtilityParseTree((Node *) localCopyCommand, queryString->data,
|
/* mimic check from copy.c */
|
||||||
PROCESS_UTILITY_TOPLEVEL, NULL, None_Receiver, NULL);
|
if (XactReadOnly && !shardRelation->rd_islocaltemp)
|
||||||
|
{
|
||||||
|
PreventCommandIfReadOnly("COPY FROM");
|
||||||
|
}
|
||||||
|
|
||||||
SetUserIdAndSecContext(savedUserId, savedSecurityContext);
|
ParseState *parseState = make_parsestate(NULL);
|
||||||
|
(void) addRangeTableEntryForRelation(parseState, shardRelation,
|
||||||
|
#if PG_VERSION_NUM >= PG_VERSION_12
|
||||||
|
RowExclusiveLock,
|
||||||
|
#endif
|
||||||
|
NULL, false, false);
|
||||||
|
|
||||||
|
CopyState copyState = BeginCopyFrom(parseState,
|
||||||
|
shardRelation,
|
||||||
|
localCopyCommand->filename,
|
||||||
|
localCopyCommand->is_program,
|
||||||
|
NULL,
|
||||||
|
localCopyCommand->attlist,
|
||||||
|
localCopyCommand->options);
|
||||||
|
CopyFrom(copyState);
|
||||||
|
EndCopyFrom(copyState);
|
||||||
|
|
||||||
|
free_parsestate(parseState);
|
||||||
|
|
||||||
/* finally delete the temporary file we created */
|
/* finally delete the temporary file we created */
|
||||||
CitusDeleteFile(localFilePath->data);
|
CitusDeleteFile(localFilePath->data);
|
||||||
|
table_close(shardRelation, NoLock);
|
||||||
|
|
||||||
PG_RETURN_VOID();
|
PG_RETURN_VOID();
|
||||||
}
|
}
|
||||||
|
@ -782,7 +804,7 @@ AlterSequenceMinMax(Oid sequenceId, char *schemaName, char *sequenceName,
|
||||||
|
|
||||||
/* since the command is an AlterSeqStmt, a dummy command string works fine */
|
/* since the command is an AlterSeqStmt, a dummy command string works fine */
|
||||||
ProcessUtilityParseTree((Node *) alterSequenceStatement, dummyString,
|
ProcessUtilityParseTree((Node *) alterSequenceStatement, dummyString,
|
||||||
PROCESS_UTILITY_TOPLEVEL, NULL, None_Receiver, NULL);
|
PROCESS_UTILITY_QUERY, NULL, None_Receiver, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,7 @@
|
||||||
|
|
||||||
#include "executor/spi.h"
|
#include "executor/spi.h"
|
||||||
#include "nodes/makefuncs.h"
|
#include "nodes/makefuncs.h"
|
||||||
|
#include "parser/parse_relation.h"
|
||||||
#include "parser/parse_type.h"
|
#include "parser/parse_type.h"
|
||||||
#include "storage/lmgr.h"
|
#include "storage/lmgr.h"
|
||||||
#include "utils/acl.h"
|
#include "utils/acl.h"
|
||||||
|
@ -183,8 +184,6 @@ worker_merge_files_into_table(PG_FUNCTION_ARGS)
|
||||||
StringInfo jobSchemaName = JobSchemaName(jobId);
|
StringInfo jobSchemaName = JobSchemaName(jobId);
|
||||||
StringInfo taskTableName = TaskTableName(taskId);
|
StringInfo taskTableName = TaskTableName(taskId);
|
||||||
StringInfo taskDirectoryName = TaskDirectoryName(jobId, taskId);
|
StringInfo taskDirectoryName = TaskDirectoryName(jobId, taskId);
|
||||||
Oid savedUserId = InvalidOid;
|
|
||||||
int savedSecurityContext = 0;
|
|
||||||
Oid userId = GetUserId();
|
Oid userId = GetUserId();
|
||||||
|
|
||||||
/* we should have the same number of column names and types */
|
/* we should have the same number of column names and types */
|
||||||
|
@ -233,14 +232,9 @@ worker_merge_files_into_table(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
CreateTaskTable(jobSchemaName, taskTableName, columnNameList, columnTypeList);
|
CreateTaskTable(jobSchemaName, taskTableName, columnNameList, columnTypeList);
|
||||||
|
|
||||||
/* need superuser to copy from files */
|
|
||||||
GetUserIdAndSecContext(&savedUserId, &savedSecurityContext);
|
|
||||||
SetUserIdAndSecContext(CitusExtensionOwner(), SECURITY_LOCAL_USERID_CHANGE);
|
|
||||||
|
|
||||||
CopyTaskFilesFromDirectory(jobSchemaName, taskTableName, taskDirectoryName,
|
CopyTaskFilesFromDirectory(jobSchemaName, taskTableName, taskDirectoryName,
|
||||||
userId);
|
userId);
|
||||||
|
|
||||||
SetUserIdAndSecContext(savedUserId, savedSecurityContext);
|
|
||||||
PG_RETURN_VOID();
|
PG_RETURN_VOID();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -569,8 +563,8 @@ CopyTaskFilesFromDirectory(StringInfo schemaName, StringInfo relationName,
|
||||||
appendStringInfo(fullFilename, "%s/%s", directoryName, baseFilename);
|
appendStringInfo(fullFilename, "%s/%s", directoryName, baseFilename);
|
||||||
|
|
||||||
/* build relation object and copy statement */
|
/* build relation object and copy statement */
|
||||||
RangeVar *relation = makeRangeVar(schemaName->data, relationName->data, -1);
|
RangeVar *rangeVar = makeRangeVar(schemaName->data, relationName->data, -1);
|
||||||
CopyStmt *copyStatement = CopyStatement(relation, fullFilename->data);
|
CopyStmt *copyStatement = CopyStatement(rangeVar, fullFilename->data);
|
||||||
if (BinaryWorkerCopyFormat)
|
if (BinaryWorkerCopyFormat)
|
||||||
{
|
{
|
||||||
DefElem *copyOption = makeDefElem("format", (Node *) makeString("binary"),
|
DefElem *copyOption = makeDefElem("format", (Node *) makeString("binary"),
|
||||||
|
@ -579,12 +573,28 @@ CopyTaskFilesFromDirectory(StringInfo schemaName, StringInfo relationName,
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
ParseState *pstate = make_parsestate(NULL);
|
ParseState *parseState = make_parsestate(NULL);
|
||||||
pstate->p_sourcetext = queryString;
|
parseState->p_sourcetext = queryString;
|
||||||
|
|
||||||
DoCopy(pstate, copyStatement, -1, -1, &copiedRowCount);
|
Relation relation = table_openrv(rangeVar, RowExclusiveLock);
|
||||||
|
(void) addRangeTableEntryForRelation(parseState, relation,
|
||||||
|
#if PG_VERSION_NUM >= PG_VERSION_12
|
||||||
|
RowExclusiveLock,
|
||||||
|
#endif
|
||||||
|
NULL, false, false);
|
||||||
|
|
||||||
free_parsestate(pstate);
|
CopyState copyState = BeginCopyFrom(parseState,
|
||||||
|
relation,
|
||||||
|
copyStatement->filename,
|
||||||
|
copyStatement->is_program,
|
||||||
|
NULL,
|
||||||
|
copyStatement->attlist,
|
||||||
|
copyStatement->options);
|
||||||
|
copiedRowCount = CopyFrom(copyState);
|
||||||
|
EndCopyFrom(copyState);
|
||||||
|
|
||||||
|
free_parsestate(parseState);
|
||||||
|
table_close(relation, NoLock);
|
||||||
}
|
}
|
||||||
|
|
||||||
copiedRowTotal += copiedRowCount;
|
copiedRowTotal += copiedRowCount;
|
||||||
|
|
|
@ -24,6 +24,14 @@
|
||||||
/* controlled via GUC, should be accessed via EnableLocalReferenceForeignKeys() */
|
/* controlled via GUC, should be accessed via EnableLocalReferenceForeignKeys() */
|
||||||
extern bool EnableLocalReferenceForeignKeys;
|
extern bool EnableLocalReferenceForeignKeys;
|
||||||
|
|
||||||
|
extern void SwitchToSequentialAndLocalExecutionIfRelationNameTooLong(Oid relationId,
|
||||||
|
char *
|
||||||
|
finalRelationName);
|
||||||
|
extern void SwitchToSequentialAndLocalExecutionIfPartitionNameTooLong(Oid
|
||||||
|
parentRelationId,
|
||||||
|
Oid
|
||||||
|
partitionRelationId);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* DistributeObjectOps specifies handlers for node/object type pairs.
|
* DistributeObjectOps specifies handlers for node/object type pairs.
|
||||||
|
|
|
@ -200,8 +200,12 @@ extern int NodeConnectionTimeout;
|
||||||
/* maximum number of connections to cache per worker per session */
|
/* maximum number of connections to cache per worker per session */
|
||||||
extern int MaxCachedConnectionsPerWorker;
|
extern int MaxCachedConnectionsPerWorker;
|
||||||
|
|
||||||
|
/* maximum lifetime of connections in miliseconds */
|
||||||
|
extern int MaxCachedConnectionLifetime;
|
||||||
|
|
||||||
/* parameters used for outbound connections */
|
/* parameters used for outbound connections */
|
||||||
extern char *NodeConninfo;
|
extern char *NodeConninfo;
|
||||||
|
extern char *LocalHostName;
|
||||||
|
|
||||||
/* the hash tables are externally accessiable */
|
/* the hash tables are externally accessiable */
|
||||||
extern HTAB *ConnectionHash;
|
extern HTAB *ConnectionHash;
|
||||||
|
@ -258,4 +262,5 @@ extern bool IsCitusInitiatedRemoteBackend(void);
|
||||||
extern double MillisecondsPassedSince(instr_time moment);
|
extern double MillisecondsPassedSince(instr_time moment);
|
||||||
extern long MillisecondsToTimeout(instr_time start, long msAfterStart);
|
extern long MillisecondsToTimeout(instr_time start, long msAfterStart);
|
||||||
|
|
||||||
|
extern void WarmUpConnParamsHash(void);
|
||||||
#endif /* CONNECTION_MANAGMENT_H */
|
#endif /* CONNECTION_MANAGMENT_H */
|
||||||
|
|
|
@ -28,7 +28,8 @@ extern void SetTaskQueryString(Task *task, char *queryString);
|
||||||
extern void SetTaskQueryStringList(Task *task, List *queryStringList);
|
extern void SetTaskQueryStringList(Task *task, List *queryStringList);
|
||||||
extern char * TaskQueryString(Task *task);
|
extern char * TaskQueryString(Task *task);
|
||||||
extern char * TaskQueryStringAtIndex(Task *task, int index);
|
extern char * TaskQueryStringAtIndex(Task *task, int index);
|
||||||
extern bool UpdateRelationsToLocalShardTables(Node *node, List *relationShardList);
|
|
||||||
extern int GetTaskQueryType(Task *task);
|
extern int GetTaskQueryType(Task *task);
|
||||||
|
extern void AddInsertAliasIfNeeded(Query *query);
|
||||||
|
|
||||||
|
|
||||||
#endif /* DEPARSE_SHARD_QUERY_H */
|
#endif /* DEPARSE_SHARD_QUERY_H */
|
||||||
|
|
|
@ -67,6 +67,9 @@ typedef struct RelationRestriction
|
||||||
|
|
||||||
/* list of RootPlanParams for all outer nodes */
|
/* list of RootPlanParams for all outer nodes */
|
||||||
List *outerPlanParamsList;
|
List *outerPlanParamsList;
|
||||||
|
|
||||||
|
/* list of translated vars, this is copied from postgres since it gets deleted on postgres*/
|
||||||
|
List *translatedVars;
|
||||||
} RelationRestriction;
|
} RelationRestriction;
|
||||||
|
|
||||||
typedef struct JoinRestrictionContext
|
typedef struct JoinRestrictionContext
|
||||||
|
@ -219,9 +222,9 @@ extern PlannedStmt * distributed_planner(Query *parse,
|
||||||
#define LOCAL_TABLE_SUBQUERY_CTE_HINT \
|
#define LOCAL_TABLE_SUBQUERY_CTE_HINT \
|
||||||
"Use CTE's or subqueries to select from local tables and use them in joins"
|
"Use CTE's or subqueries to select from local tables and use them in joins"
|
||||||
|
|
||||||
|
|
||||||
extern List * ExtractRangeTableEntryList(Query *query);
|
extern List * ExtractRangeTableEntryList(Query *query);
|
||||||
extern bool NeedsDistributedPlanning(Query *query);
|
extern bool NeedsDistributedPlanning(Query *query);
|
||||||
|
extern List * TranslatedVarsForRteIdentity(int rteIdentity);
|
||||||
extern struct DistributedPlan * GetDistributedPlan(CustomScan *node);
|
extern struct DistributedPlan * GetDistributedPlan(CustomScan *node);
|
||||||
extern void multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo,
|
extern void multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo,
|
||||||
Index restrictionIndex, RangeTblEntry *rte);
|
Index restrictionIndex, RangeTblEntry *rte);
|
||||||
|
@ -238,6 +241,7 @@ extern Node * ResolveExternalParams(Node *inputNode, ParamListInfo boundParams);
|
||||||
extern bool IsMultiTaskPlan(struct DistributedPlan *distributedPlan);
|
extern bool IsMultiTaskPlan(struct DistributedPlan *distributedPlan);
|
||||||
extern RangeTblEntry * RemoteScanRangeTableEntry(List *columnNameList);
|
extern RangeTblEntry * RemoteScanRangeTableEntry(List *columnNameList);
|
||||||
extern int GetRTEIdentity(RangeTblEntry *rte);
|
extern int GetRTEIdentity(RangeTblEntry *rte);
|
||||||
|
extern bool GetOriginalInh(RangeTblEntry *rte);
|
||||||
extern LOCKMODE GetQueryLockMode(Query *query);
|
extern LOCKMODE GetQueryLockMode(Query *query);
|
||||||
extern int32 BlessRecordExpression(Expr *expr);
|
extern int32 BlessRecordExpression(Expr *expr);
|
||||||
extern void DissuadePlannerFromUsingPlan(PlannedStmt *plan);
|
extern void DissuadePlannerFromUsingPlan(PlannedStmt *plan);
|
||||||
|
|
|
@ -19,6 +19,9 @@
|
||||||
|
|
||||||
|
|
||||||
/* Remaining metadata utility functions */
|
/* Remaining metadata utility functions */
|
||||||
|
extern Var * FindColumnWithNameOnTargetRelation(Oid sourceRelationId,
|
||||||
|
char *sourceColumnName,
|
||||||
|
Oid targetRelationId);
|
||||||
extern Var * BuildDistributionKeyFromColumnName(Relation distributedRelation,
|
extern Var * BuildDistributionKeyFromColumnName(Relation distributedRelation,
|
||||||
char *columnName);
|
char *columnName);
|
||||||
extern char * ColumnToColumnName(Oid relationId, char *columnNodeString);
|
extern char * ColumnToColumnName(Oid relationId, char *columnNodeString);
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue