mirror of https://github.com/citusdata/citus.git
Compare commits
70 Commits
Author | SHA1 | Date |
---|---|---|
|
ccc042a9d4 | |
|
13748d1eb7 | |
|
5c157a5357 | |
|
c6c31e0f1f | |
|
b504b749a6 | |
|
683279cc36 | |
|
f2ec1563c2 | |
|
313a489856 | |
|
a2e9a59007 | |
|
0419d340b6 | |
|
f7a0e3b64d | |
|
efa572009f | |
|
7beea1417c | |
|
eb699ecec6 | |
|
006df8fef3 | |
|
4ce6c9d8b9 | |
|
ea4549a4c0 | |
|
453d363ec3 | |
|
103887024d | |
|
e63435def5 | |
|
5c6df97e92 | |
|
feb37dab62 | |
|
d693bc1b0c | |
|
535b0804be | |
|
2452a899bd | |
|
fc4e64ed9a | |
|
8ed792cdb4 | |
|
6f767ac04b | |
|
43421d4043 | |
|
a573e0df95 | |
|
56c8389495 | |
|
e91be43de1 | |
|
913978235a | |
|
2abbc8fa32 | |
|
d86e4d2587 | |
|
909711548b | |
|
0103fed310 | |
|
9e5c84514a | |
|
2eea8cb741 | |
|
c45589bbcd | |
|
bab0f268d3 | |
|
20782c5ff5 | |
|
e4e0c65203 | |
|
0a1845a05c | |
|
5dd61c8319 | |
|
2ad9ca6726 | |
|
7969849598 | |
|
1d0c3f36fc | |
|
d0e4bc5d22 | |
|
2f063d0316 | |
|
1216e079fa | |
|
e0736d3da7 | |
|
b451c31c4c | |
|
e2d49c6122 | |
|
568f057ba0 | |
|
db1a0835f3 | |
|
f2035b9f33 | |
|
00e0148a9c | |
|
8d979b4752 | |
|
1bba516b56 | |
|
ca44697723 | |
|
975c0a8405 | |
|
fd89760a29 | |
|
a96ff3cd6c | |
|
741ca1d33e | |
|
946b7a1a49 | |
|
7188574c62 | |
|
d4004a495c | |
|
bcc675cf84 | |
|
6522965963 |
|
@ -40,6 +40,9 @@ jobs:
|
|||
- run:
|
||||
name: 'Check if changed'
|
||||
command: git diff --exit-code
|
||||
- run:
|
||||
name: 'Check for banned C API usage'
|
||||
command: ci/banned.h.sh
|
||||
check-sql-snapshots:
|
||||
docker:
|
||||
- image: 'citus/extbuilder:latest'
|
||||
|
|
|
@ -11,6 +11,8 @@ coverage:
|
|||
- "src/backend/distributed/utils/citus_outfuncs.c"
|
||||
- "src/backend/distributed/deparser/ruleutils_*.c"
|
||||
- "src/include/distributed/citus_nodes.h"
|
||||
- "src/backend/distributed/safeclib"
|
||||
- "vendor"
|
||||
|
||||
status:
|
||||
project:
|
||||
|
|
|
@ -26,10 +26,12 @@ configure -whitespace
|
|||
|
||||
# except these exceptions...
|
||||
src/backend/distributed/utils/citus_outfuncs.c -citus-style
|
||||
src/backend/distributed/utils/pg11_snprintf.c -citus-style
|
||||
src/backend/distributed/deparser/ruleutils_10.c -citus-style
|
||||
src/backend/distributed/deparser/ruleutils_11.c -citus-style
|
||||
src/backend/distributed/deparser/ruleutils_12.c -citus-style
|
||||
src/include/distributed/citus_nodes.h -citus-style
|
||||
/vendor/** -citus-style
|
||||
|
||||
# Hide diff on github by default for copied udfs
|
||||
src/backend/distributed/sql/udfs/*/[123456789]*.sql linguist-generated=true
|
||||
|
|
62
CHANGELOG.md
62
CHANGELOG.md
|
@ -1,3 +1,65 @@
|
|||
### citus v9.2.7 (November 3, 2021) ###
|
||||
|
||||
* Fixes `ALTER TABLE IF EXISTS SET SCHEMA` with non-existing table bug
|
||||
|
||||
* Fixes `CREATE INDEX CONCURRENTLY` with no index name on a postgres table bug
|
||||
|
||||
* Fixes a bug that could cause crashes with certain compile flags
|
||||
|
||||
* Fixes a crash because of overflow in partition id with certain compile flags
|
||||
|
||||
* Fixes a memory leak in subtransaction memory handling
|
||||
|
||||
* Fixes deparsing for queries with anonymous column references
|
||||
|
||||
### citus v9.2.6 (Jun 22, 2020) ###
|
||||
|
||||
* Fixes a version bump issue in 9.2.5
|
||||
|
||||
### citus v9.2.5 (Jun 17, 2020) ###
|
||||
|
||||
* Adds support to partially push down tdigest aggregates
|
||||
|
||||
* Fixes an issue with distributing tables having generated cols not at the end
|
||||
|
||||
### citus v9.2.4 (March 30, 2020) ###
|
||||
|
||||
* Fixes a release problem in 9.2.3
|
||||
|
||||
### citus v9.2.3 (March 25, 2020) ###
|
||||
|
||||
* Do not use C functions that have been banned by Microsoft
|
||||
|
||||
* Fixes a bug that causes wrong results with complex outer joins
|
||||
|
||||
* Fixes issues found using static analysis
|
||||
|
||||
* Fixes left join shard pruning in pushdown planner
|
||||
|
||||
* Fixes possibility for segmentation fault in internal aggregate functions
|
||||
|
||||
* Fixes possible segfault when non pushdownable aggregates are used in `HAVING`
|
||||
|
||||
* Improves correctness of planning subqueries in `HAVING`
|
||||
|
||||
* Prevents using old connections for security if `citus.node_conninfo` changed
|
||||
|
||||
* Uses Microsoft approved cipher string for default TLS setup
|
||||
|
||||
### citus v9.2.2 (March 5, 2020) ###
|
||||
|
||||
* Fixes a bug that caused some prepared stmts with function calls to fail
|
||||
|
||||
* Fixes a bug that caused some prepared stmts with composite types to fail
|
||||
|
||||
* Fixes a bug that caused missing subplan results in workers
|
||||
|
||||
* Improves performance of re-partition joins
|
||||
|
||||
### citus v9.2.1 (February 14, 2020) ###
|
||||
|
||||
* Fixes a bug that could cause crashes if distribution key is NULL
|
||||
|
||||
### citus v9.2.0 (February 10, 2020) ###
|
||||
|
||||
* Adds support for `INSERT...SELECT` queries with re-partitioning
|
||||
|
|
4
Makefile
4
Makefile
|
@ -26,7 +26,9 @@ install-headers: extension
|
|||
$(INSTALL_DATA) $(citus_abs_srcdir)/src/include/distributed/*.h '$(DESTDIR)$(includedir_server)/distributed/'
|
||||
clean-extension:
|
||||
$(MAKE) -C src/backend/distributed/ clean
|
||||
.PHONY: extension install-extension clean-extension
|
||||
clean-full:
|
||||
$(MAKE) -C src/backend/distributed/ clean-full
|
||||
.PHONY: extension install-extension clean-extension clean-full
|
||||
# Add to generic targets
|
||||
install: install-extension install-headers
|
||||
clean: clean-extension
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Checks for the APIs that are banned by microsoft. Since we compile for Linux
|
||||
# we use the replacements from https://github.com/intel/safestringlib
|
||||
# Not all replacement functions are available in safestringlib. If it doesn't
|
||||
# exist and you cannot rewrite the code to not use the banned API, then you can
|
||||
# add a comment containing "IGNORE-BANNED" to the line where the error is and
|
||||
# this check will ignore that match.
|
||||
#
|
||||
# The replacement function that you should use are listed here:
|
||||
# https://liquid.microsoft.com/Web/Object/Read/ms.security/Requirements/Microsoft.Security.SystemsADM.10082#guide
|
||||
|
||||
set -eu
|
||||
|
||||
files=$(find src -iname '*.[ch]' | git check-attr --stdin citus-style | grep -v ': unset$' | sed 's/: citus-style: set$//')
|
||||
|
||||
# grep is allowed to fail, that means no banned matches are found
|
||||
set +e
|
||||
# Required banned from banned.h. These functions are not allowed to be used at
|
||||
# all.
|
||||
# shellcheck disable=SC2086
|
||||
grep -E '\b(strcpy|strcpyA|strcpyW|wcscpy|_tcscpy|_mbscpy|StrCpy|StrCpyA|StrCpyW|lstrcpy|lstrcpyA|lstrcpyW|_tccpy|_mbccpy|_ftcscpy|strcat|strcatA|strcatW|wcscat|_tcscat|_mbscat|StrCat|StrCatA|StrCatW|lstrcat|lstrcatA|lstrcatW|StrCatBuff|StrCatBuffA|StrCatBuffW|StrCatChainW|_tccat|_mbccat|_ftcscat|sprintfW|sprintfA|wsprintf|wsprintfW|wsprintfA|sprintf|swprintf|_stprintf|wvsprintf|wvsprintfA|wvsprintfW|vsprintf|_vstprintf|vswprintf|strncpy|wcsncpy|_tcsncpy|_mbsncpy|_mbsnbcpy|StrCpyN|StrCpyNA|StrCpyNW|StrNCpy|strcpynA|StrNCpyA|StrNCpyW|lstrcpyn|lstrcpynA|lstrcpynW|strncat|wcsncat|_tcsncat|_mbsncat|_mbsnbcat|StrCatN|StrCatNA|StrCatNW|StrNCat|StrNCatA|StrNCatW|lstrncat|lstrcatnA|lstrcatnW|lstrcatn|gets|_getts|_gettws|IsBadWritePtr|IsBadHugeWritePtr|IsBadReadPtr|IsBadHugeReadPtr|IsBadCodePtr|IsBadStringPtr|memcpy|RtlCopyMemory|CopyMemory|wmemcpy|lstrlen)\(' $files \
|
||||
| grep -v "IGNORE-BANNED" \
|
||||
&& echo "ERROR: Required banned API usage detected" && exit 1
|
||||
|
||||
# Required banned from table on liquid. These functions are not allowed to be
|
||||
# used at all.
|
||||
# shellcheck disable=SC2086
|
||||
grep -E '\b(strcat|strcpy|strerror|strncat|strncpy|strtok|wcscat|wcscpy|wcsncat|wcsncpy|wcstok|fprintf|fwprintf|printf|snprintf|sprintf|swprintf|vfprintf|vprintf|vsnprintf|vsprintf|vswprintf|vwprintf|wprintf|fscanf|fwscanf|gets|scanf|sscanf|swscanf|vfscanf|vfwscanf|vscanf|vsscanf|vswscanf|vwscanf|wscanf|asctime|atof|atoi|atol|atoll|bsearch|ctime|fopen|freopen|getenv|gmtime|localtime|mbsrtowcs|mbstowcs|memcpy|memmove|qsort|rewind|setbuf|wmemcpy|wmemmove)\(' $files \
|
||||
| grep -v "IGNORE-BANNED" \
|
||||
&& echo "ERROR: Required banned API usage from table detected" && exit 1
|
||||
|
||||
# Recommended banned from banned.h. If you can change the code not to use these
|
||||
# that would be great. You can use IGNORE-BANNED if you need to use it anyway.
|
||||
# You can also remove it from the regex, if you want to mark the API as allowed
|
||||
# throughout the codebase (to not have to add IGNORED-BANNED everywhere). In
|
||||
# that case note it in this comment that you did so.
|
||||
# shellcheck disable=SC2086
|
||||
grep -E '\b(wnsprintf|wnsprintfA|wnsprintfW|_snwprintf|_snprintf|_sntprintf|_vsnprintf|vsnprintf|_vsnwprintf|_vsntprintf|wvnsprintf|wvnsprintfA|wvnsprintfW|strtok|_tcstok|wcstok|_mbstok|makepath|_tmakepath| _makepath|_wmakepath|_splitpath|_tsplitpath|_wsplitpath|scanf|wscanf|_tscanf|sscanf|swscanf|_stscanf|snscanf|snwscanf|_sntscanf|_itoa|_itow|_i64toa|_i64tow|_ui64toa|_ui64tot|_ui64tow|_ultoa|_ultot|_ultow|CharToOem|CharToOemA|CharToOemW|OemToChar|OemToCharA|OemToCharW|CharToOemBuffA|CharToOemBuffW|alloca|_alloca|ChangeWindowMessageFilter)\(' $files \
|
||||
| grep -v "IGNORE-BANNED" \
|
||||
&& echo "ERROR: Recomended banned API usage detected" && exit 1
|
||||
|
||||
# Recommended banned from table on liquid. If you can change the code not to use these
|
||||
# that would be great. You can use IGNORE-BANNED if you need to use it anyway.
|
||||
# You can also remove it from the regex, if you want to mark the API as allowed
|
||||
# throughout the codebase (to not have to add IGNORED-BANNED everywhere). In
|
||||
# that case note it in this comment that you did so.
|
||||
# Banned APIs ignored throughout the codebase:
|
||||
# - strlen
|
||||
# shellcheck disable=SC2086
|
||||
grep -E '\b(alloca|getwd|mktemp|tmpnam|wcrtomb|wcrtombs|wcslen|wcsrtombs|wcstombs|wctomb|class_addMethod|class_replaceMethod)\(' $files \
|
||||
| grep -v "IGNORE-BANNED" \
|
||||
&& echo "ERROR: Recomended banned API usage detected" && exit 1
|
||||
exit 0
|
|
@ -4,6 +4,8 @@ for f in $(git ls-tree -r HEAD --name-only); do
|
|||
if [ "$f" = "${f%.out}" ] &&
|
||||
[ "$f" = "${f%.data}" ] &&
|
||||
[ "$f" = "${f%.png}" ] &&
|
||||
[ -f "$f" ] &&
|
||||
[ "$(echo "$f" | cut -d / -f1)" != "vendor" ] &&
|
||||
[ "$(dirname "$f")" != "src/test/regress/output" ]
|
||||
then
|
||||
# Trim trailing whitespace
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for Citus 9.2devel.
|
||||
# Generated by GNU Autoconf 2.69 for Citus 9.2.7.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
|
@ -579,8 +579,8 @@ MAKEFLAGS=
|
|||
# Identity of this package.
|
||||
PACKAGE_NAME='Citus'
|
||||
PACKAGE_TARNAME='citus'
|
||||
PACKAGE_VERSION='9.2devel'
|
||||
PACKAGE_STRING='Citus 9.2devel'
|
||||
PACKAGE_VERSION='9.2.7'
|
||||
PACKAGE_STRING='Citus 9.2.7'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
|
@ -664,6 +664,7 @@ infodir
|
|||
docdir
|
||||
oldincludedir
|
||||
includedir
|
||||
runstatedir
|
||||
localstatedir
|
||||
sharedstatedir
|
||||
sysconfdir
|
||||
|
@ -740,6 +741,7 @@ datadir='${datarootdir}'
|
|||
sysconfdir='${prefix}/etc'
|
||||
sharedstatedir='${prefix}/com'
|
||||
localstatedir='${prefix}/var'
|
||||
runstatedir='${localstatedir}/run'
|
||||
includedir='${prefix}/include'
|
||||
oldincludedir='/usr/include'
|
||||
docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
|
||||
|
@ -992,6 +994,15 @@ do
|
|||
| -silent | --silent | --silen | --sile | --sil)
|
||||
silent=yes ;;
|
||||
|
||||
-runstatedir | --runstatedir | --runstatedi | --runstated \
|
||||
| --runstate | --runstat | --runsta | --runst | --runs \
|
||||
| --run | --ru | --r)
|
||||
ac_prev=runstatedir ;;
|
||||
-runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
|
||||
| --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
|
||||
| --run=* | --ru=* | --r=*)
|
||||
runstatedir=$ac_optarg ;;
|
||||
|
||||
-sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
|
||||
ac_prev=sbindir ;;
|
||||
-sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
|
||||
|
@ -1129,7 +1140,7 @@ fi
|
|||
for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
|
||||
datadir sysconfdir sharedstatedir localstatedir includedir \
|
||||
oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
|
||||
libdir localedir mandir
|
||||
libdir localedir mandir runstatedir
|
||||
do
|
||||
eval ac_val=\$$ac_var
|
||||
# Remove trailing slashes.
|
||||
|
@ -1242,7 +1253,7 @@ if test "$ac_init_help" = "long"; then
|
|||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures Citus 9.2devel to adapt to many kinds of systems.
|
||||
\`configure' configures Citus 9.2.7 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
|
@ -1282,6 +1293,7 @@ Fine tuning of the installation directories:
|
|||
--sysconfdir=DIR read-only single-machine data [PREFIX/etc]
|
||||
--sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
|
||||
--localstatedir=DIR modifiable single-machine data [PREFIX/var]
|
||||
--runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run]
|
||||
--libdir=DIR object code libraries [EPREFIX/lib]
|
||||
--includedir=DIR C header files [PREFIX/include]
|
||||
--oldincludedir=DIR C header files for non-gcc [/usr/include]
|
||||
|
@ -1303,7 +1315,7 @@ fi
|
|||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of Citus 9.2devel:";;
|
||||
short | recursive ) echo "Configuration of Citus 9.2.7:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
|
@ -1403,7 +1415,7 @@ fi
|
|||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
Citus configure 9.2devel
|
||||
Citus configure 9.2.7
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
|
@ -1886,7 +1898,7 @@ cat >config.log <<_ACEOF
|
|||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by Citus $as_me 9.2devel, which was
|
||||
It was created by Citus $as_me 9.2.7, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
|
@ -5055,7 +5067,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by Citus $as_me 9.2devel, which was
|
||||
This file was extended by Citus $as_me 9.2.7, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
|
@ -5117,7 +5129,7 @@ _ACEOF
|
|||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
Citus config.status 9.2devel
|
||||
Citus config.status 9.2.7
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
# everyone needing autoconf installed, the resulting files are checked
|
||||
# into the SCM.
|
||||
|
||||
AC_INIT([Citus], [9.2devel])
|
||||
AC_INIT([Citus], [9.2.7])
|
||||
AC_COPYRIGHT([Copyright (c) Citus Data, Inc.])
|
||||
|
||||
# we'll need sed and awk for some of the version commands
|
||||
|
|
|
@ -2,6 +2,10 @@
|
|||
|
||||
citus_subdir = src/backend/distributed
|
||||
citus_top_builddir = ../../..
|
||||
safestringlib_srcdir = $(citus_abs_top_srcdir)/vendor/safestringlib
|
||||
safestringlib_builddir = $(citus_top_builddir)/vendor/safestringlib/build
|
||||
safestringlib_a = $(safestringlib_builddir)/libsafestring_static.a
|
||||
safestringlib_sources = $(wildcard $(safestringlib_srcdir)/safeclib/*)
|
||||
|
||||
MODULE_big = citus
|
||||
EXTENSION = citus
|
||||
|
@ -12,7 +16,11 @@ generated_sql_files = $(patsubst %,$(citus_abs_srcdir)/build/%,$(template_sql_fi
|
|||
DATA_built = $(generated_sql_files)
|
||||
|
||||
# directories with source files
|
||||
SUBDIRS = . commands connection ddl deparser executor master metadata planner progress relay test transaction utils worker
|
||||
SUBDIRS = . commands connection ddl deparser executor master metadata planner progress relay safeclib test transaction utils worker
|
||||
|
||||
# Symlinks are not copied over to the build directory if a separete build
|
||||
# directory is used during configure (such as on CI)
|
||||
ENSURE_SUBDIRS_EXIST := $(shell mkdir -p $(SUBDIRS))
|
||||
|
||||
# That patsubst rule searches all directories listed in SUBDIRS for .c
|
||||
# files, and adds the corresponding .o files to OBJS
|
||||
|
@ -39,7 +47,7 @@ utils/citus_version.o: $(CITUS_VERSION_INVALIDATE)
|
|||
|
||||
SHLIB_LINK += $(filter -lssl -lcrypto -lssleay32 -leay32, $(LIBS))
|
||||
|
||||
override CPPFLAGS += -I$(libpq_srcdir)
|
||||
override CPPFLAGS += -I$(libpq_srcdir) -I$(safestringlib_srcdir)/include
|
||||
|
||||
SQL_DEPDIR=.deps/sql
|
||||
SQL_BUILDDIR=build/sql
|
||||
|
@ -53,7 +61,7 @@ ifneq (,$(SQL_Po_files))
|
|||
include $(SQL_Po_files)
|
||||
endif
|
||||
|
||||
.PHONY: check-sql-snapshots
|
||||
.PHONY: check-sql-snapshots clean-full
|
||||
|
||||
check-sql-snapshots:
|
||||
bash -c '\
|
||||
|
@ -67,3 +75,7 @@ cleanup-before-install:
|
|||
rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus*
|
||||
|
||||
install: cleanup-before-install
|
||||
|
||||
clean-full:
|
||||
make clean
|
||||
rm -rf $(safestringlib_builddir)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# Citus extension
|
||||
comment = 'Citus distributed database'
|
||||
default_version = '9.2-2'
|
||||
default_version = '9.2-4'
|
||||
module_pathname = '$libdir/citus'
|
||||
relocatable = false
|
||||
schema = pg_catalog
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include "access/htup_details.h"
|
||||
#include "access/xact.h"
|
||||
#include "catalog/pg_collation.h"
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "distributed/commands/utility_hook.h"
|
||||
#include "distributed/commands.h"
|
||||
#include "distributed/deparser.h"
|
||||
|
@ -529,16 +530,17 @@ GenerateBackupNameForCollationCollision(const ObjectAddress *address)
|
|||
|
||||
while (true)
|
||||
{
|
||||
int suffixLength = snprintf(suffix, NAMEDATALEN - 1, "(citus_backup_%d)",
|
||||
count);
|
||||
int suffixLength = SafeSnprintf(suffix, NAMEDATALEN - 1, "(citus_backup_%d)",
|
||||
count);
|
||||
|
||||
/* trim the base name at the end to leave space for the suffix and trailing \0 */
|
||||
baseLength = Min(baseLength, NAMEDATALEN - suffixLength - 1);
|
||||
|
||||
/* clear newName before copying the potentially trimmed baseName and suffix */
|
||||
memset(newName, 0, NAMEDATALEN);
|
||||
strncpy(newName, baseName, baseLength);
|
||||
strncpy(newName + baseLength, suffix, suffixLength);
|
||||
strncpy_s(newName, NAMEDATALEN, baseName, baseLength);
|
||||
strncpy_s(newName + baseLength, NAMEDATALEN - baseLength, suffix,
|
||||
suffixLength);
|
||||
|
||||
List *newCollationName = list_make2(namespace, makeString(newName));
|
||||
|
||||
|
|
|
@ -166,6 +166,7 @@ master_create_distributed_table(PG_FUNCTION_ARGS)
|
|||
char *distributionColumnName = text_to_cstring(distributionColumnText);
|
||||
Var *distributionColumn = BuildDistributionKeyFromColumnName(relation,
|
||||
distributionColumnName);
|
||||
Assert(distributionColumn != NULL);
|
||||
char distributionMethod = LookupDistributionMethod(distributionMethodOid);
|
||||
|
||||
CreateDistributedTable(relationId, distributionColumn, distributionMethod,
|
||||
|
@ -232,6 +233,7 @@ create_distributed_table(PG_FUNCTION_ARGS)
|
|||
char *distributionColumnName = text_to_cstring(distributionColumnText);
|
||||
Var *distributionColumn = BuildDistributionKeyFromColumnName(relation,
|
||||
distributionColumnName);
|
||||
Assert(distributionColumn != NULL);
|
||||
char distributionMethod = LookupDistributionMethod(distributionMethodOid);
|
||||
|
||||
char *colocateWithTableName = text_to_cstring(colocateWithTableNameText);
|
||||
|
@ -795,7 +797,7 @@ EnsureTableCanBeColocatedWith(Oid relationId, char replicationModel,
|
|||
DistTableCacheEntry *sourceTableEntry = DistributedTableCacheEntry(sourceRelationId);
|
||||
char sourceDistributionMethod = sourceTableEntry->partitionMethod;
|
||||
char sourceReplicationModel = sourceTableEntry->replicationModel;
|
||||
Var *sourceDistributionColumn = DistPartitionKey(sourceRelationId);
|
||||
Var *sourceDistributionColumn = ForceDistPartitionKey(sourceRelationId);
|
||||
|
||||
if (sourceDistributionMethod != DISTRIBUTE_BY_HASH)
|
||||
{
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "catalog/pg_type.h"
|
||||
#include "commands/extension.h"
|
||||
#include "distributed/citus_ruleutils.h"
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "distributed/colocation_utils.h"
|
||||
#include "distributed/commands.h"
|
||||
#include "distributed/commands/utility_hook.h"
|
||||
|
@ -411,7 +412,6 @@ EnsureFunctionCanBeColocatedWithTable(Oid functionOid, Oid distributionColumnTyp
|
|||
DistTableCacheEntry *sourceTableEntry = DistributedTableCacheEntry(sourceRelationId);
|
||||
char sourceDistributionMethod = sourceTableEntry->partitionMethod;
|
||||
char sourceReplicationModel = sourceTableEntry->replicationModel;
|
||||
Var *sourceDistributionColumn = DistPartitionKey(sourceRelationId);
|
||||
|
||||
if (sourceDistributionMethod != DISTRIBUTE_BY_HASH)
|
||||
{
|
||||
|
@ -443,6 +443,7 @@ EnsureFunctionCanBeColocatedWithTable(Oid functionOid, Oid distributionColumnTyp
|
|||
* If the types are the same, we're good. If not, we still check if there
|
||||
* is any coercion path between the types.
|
||||
*/
|
||||
Var *sourceDistributionColumn = ForceDistPartitionKey(sourceRelationId);
|
||||
Oid sourceDistributionColumnType = sourceDistributionColumn->vartype;
|
||||
if (sourceDistributionColumnType != distributionColumnType)
|
||||
{
|
||||
|
@ -1715,16 +1716,17 @@ GenerateBackupNameForProcCollision(const ObjectAddress *address)
|
|||
|
||||
while (true)
|
||||
{
|
||||
int suffixLength = snprintf(suffix, NAMEDATALEN - 1, "(citus_backup_%d)",
|
||||
count);
|
||||
int suffixLength = SafeSnprintf(suffix, NAMEDATALEN - 1, "(citus_backup_%d)",
|
||||
count);
|
||||
|
||||
/* trim the base name at the end to leave space for the suffix and trailing \0 */
|
||||
baseLength = Min(baseLength, NAMEDATALEN - suffixLength - 1);
|
||||
|
||||
/* clear newName before copying the potentially trimmed baseName and suffix */
|
||||
memset(newName, 0, NAMEDATALEN);
|
||||
strncpy(newName, baseName, baseLength);
|
||||
strncpy(newName + baseLength, suffix, suffixLength);
|
||||
strncpy_s(newName, NAMEDATALEN, baseName, baseLength);
|
||||
strncpy_s(newName + baseLength, NAMEDATALEN - baseLength, suffix,
|
||||
suffixLength);
|
||||
|
||||
List *newProcName = list_make2(namespace, makeString(newName));
|
||||
|
||||
|
|
|
@ -410,6 +410,16 @@ PostprocessIndexStmt(Node *node, const char *queryString)
|
|||
return NIL;
|
||||
}
|
||||
|
||||
/*
|
||||
* We make sure schema name is not null in the PreprocessIndexStmt
|
||||
*/
|
||||
Oid schemaId = get_namespace_oid(indexStmt->relation->schemaname, true);
|
||||
Oid relationId = get_relname_relid(indexStmt->relation->relname, schemaId);
|
||||
if (!IsDistributedTable(relationId))
|
||||
{
|
||||
return NIL;
|
||||
}
|
||||
|
||||
/* commit the current transaction and start anew */
|
||||
CommitTransactionCommand();
|
||||
StartTransactionCommand();
|
||||
|
@ -417,7 +427,7 @@ PostprocessIndexStmt(Node *node, const char *queryString)
|
|||
/* get the affected relation and index */
|
||||
Relation relation = heap_openrv(indexStmt->relation, ShareUpdateExclusiveLock);
|
||||
Oid indexRelationId = get_relname_relid(indexStmt->idxname,
|
||||
RelationGetNamespace(relation));
|
||||
schemaId);
|
||||
Relation indexRelation = index_open(indexRelationId, RowExclusiveLock);
|
||||
|
||||
/* close relations but retain locks */
|
||||
|
@ -793,7 +803,6 @@ ErrorIfUnsupportedIndexStmt(IndexStmt *createIndexStatement)
|
|||
/* caller uses ShareLock for non-concurrent indexes, use the same lock here */
|
||||
LOCKMODE lockMode = ShareLock;
|
||||
Oid relationId = RangeVarGetRelid(relation, lockMode, missingOk);
|
||||
Var *partitionKey = DistPartitionKey(relationId);
|
||||
char partitionMethod = PartitionMethod(relationId);
|
||||
ListCell *indexParameterCell = NULL;
|
||||
bool indexContainsPartitionColumn = false;
|
||||
|
@ -814,6 +823,7 @@ ErrorIfUnsupportedIndexStmt(IndexStmt *createIndexStatement)
|
|||
"is currently unsupported")));
|
||||
}
|
||||
|
||||
Var *partitionKey = ForceDistPartitionKey(relationId);
|
||||
List *indexParameterList = createIndexStatement->indexParams;
|
||||
foreach(indexParameterCell, indexParameterList)
|
||||
{
|
||||
|
|
|
@ -65,6 +65,7 @@
|
|||
#include "catalog/pg_type.h"
|
||||
#include "commands/copy.h"
|
||||
#include "commands/defrem.h"
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "distributed/commands/multi_copy.h"
|
||||
#include "distributed/commands/utility_hook.h"
|
||||
#include "distributed/intermediate_results.h"
|
||||
|
@ -426,9 +427,8 @@ CopyToExistingShards(CopyStmt *copyStatement, char *completionTag)
|
|||
* There is no need to deep copy everything. We will just deep copy of the fields
|
||||
* we will change.
|
||||
*/
|
||||
memcpy(copiedDistributedRelation, distributedRelation, sizeof(RelationData));
|
||||
memcpy(copiedDistributedRelationTuple, distributedRelation->rd_rel,
|
||||
CLASS_TUPLE_SIZE);
|
||||
*copiedDistributedRelation = *distributedRelation;
|
||||
*copiedDistributedRelationTuple = *distributedRelation->rd_rel;
|
||||
|
||||
copiedDistributedRelation->rd_rel = copiedDistributedRelationTuple;
|
||||
copiedDistributedRelation->rd_att = CreateTupleDescCopyConstr(tupleDescriptor);
|
||||
|
@ -505,8 +505,8 @@ CopyToExistingShards(CopyStmt *copyStatement, char *completionTag)
|
|||
|
||||
if (completionTag != NULL)
|
||||
{
|
||||
snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
|
||||
"COPY " UINT64_FORMAT, processedRowCount);
|
||||
SafeSnprintf(completionTag, COMPLETION_TAG_BUFSIZE,
|
||||
"COPY " UINT64_FORMAT, processedRowCount);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -680,8 +680,8 @@ CopyToNewShards(CopyStmt *copyStatement, char *completionTag, Oid relationId)
|
|||
|
||||
if (completionTag != NULL)
|
||||
{
|
||||
snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
|
||||
"COPY " UINT64_FORMAT, processedRowCount);
|
||||
SafeSnprintf(completionTag, COMPLETION_TAG_BUFSIZE,
|
||||
"COPY " UINT64_FORMAT, processedRowCount);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1208,8 +1208,8 @@ TypeForColumnName(Oid relationId, TupleDesc tupleDescriptor, char *columnName)
|
|||
|
||||
|
||||
/*
|
||||
* Walks a TupleDesc and returns an array of the types of each attribute. Will return
|
||||
* InvalidOid in the place of dropped attributes.
|
||||
* Walks a TupleDesc and returns an array of the types of each attribute.
|
||||
* Returns InvalidOid in the place of dropped or generated attributes.
|
||||
*/
|
||||
static Oid *
|
||||
TypeArrayFromTupleDescriptor(TupleDesc tupleDescriptor)
|
||||
|
@ -1220,7 +1220,11 @@ TypeArrayFromTupleDescriptor(TupleDesc tupleDescriptor)
|
|||
for (int columnIndex = 0; columnIndex < columnCount; columnIndex++)
|
||||
{
|
||||
Form_pg_attribute attr = TupleDescAttr(tupleDescriptor, columnIndex);
|
||||
if (attr->attisdropped)
|
||||
if (attr->attisdropped
|
||||
#if PG_VERSION_NUM >= 120000
|
||||
|| attr->attgenerated == ATTRIBUTE_GENERATED_STORED
|
||||
#endif
|
||||
)
|
||||
{
|
||||
typeArray[columnIndex] = InvalidOid;
|
||||
}
|
||||
|
@ -1256,7 +1260,7 @@ ColumnCoercionPaths(TupleDesc destTupleDescriptor, TupleDesc inputTupleDescripto
|
|||
|
||||
if (inputTupleType == InvalidOid)
|
||||
{
|
||||
/* this was a dropped column and will not be in the incoming tuples */
|
||||
/* TypeArrayFromTupleDescriptor decided to skip this column */
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1296,9 +1300,9 @@ TypeOutputFunctions(uint32 columnCount, Oid *typeIdArray, bool binaryFormat)
|
|||
bool typeVariableLength = false;
|
||||
Oid outputFunctionId = InvalidOid;
|
||||
|
||||
/* If there are any dropped columns it'll show up as a NULL */
|
||||
if (columnTypeId == InvalidOid)
|
||||
{
|
||||
/* TypeArrayFromTupleDescriptor decided to skip this column */
|
||||
continue;
|
||||
}
|
||||
else if (binaryFormat)
|
||||
|
@ -2493,8 +2497,8 @@ ProcessCopyStmt(CopyStmt *copyStatement, char *completionTag, const char *queryS
|
|||
|
||||
int64 tuplesSent = WorkerExecuteSqlTask(query, filename, binaryCopyFormat);
|
||||
|
||||
snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
|
||||
"COPY " UINT64_FORMAT, tuplesSent);
|
||||
SafeSnprintf(completionTag, COMPLETION_TAG_BUFSIZE,
|
||||
"COPY " UINT64_FORMAT, tuplesSent);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -158,7 +158,7 @@ PostprocessCreateTableStmtPartitionOf(CreateStmt *createStatement, const
|
|||
bool missingOk = false;
|
||||
Oid relationId = RangeVarGetRelid(createStatement->relation, NoLock,
|
||||
missingOk);
|
||||
Var *parentDistributionColumn = DistPartitionKey(parentRelationId);
|
||||
Var *parentDistributionColumn = ForceDistPartitionKey(parentRelationId);
|
||||
char parentDistributionMethod = DISTRIBUTE_BY_HASH;
|
||||
char *parentRelationName = generate_qualified_relation_name(parentRelationId);
|
||||
bool viaDeprecatedAPI = false;
|
||||
|
@ -237,7 +237,7 @@ PostprocessAlterTableStmtAttachPartition(AlterTableStmt *alterTableStatement,
|
|||
if (IsDistributedTable(relationId) &&
|
||||
!IsDistributedTable(partitionRelationId))
|
||||
{
|
||||
Var *distributionColumn = DistPartitionKey(relationId);
|
||||
Var *distributionColumn = ForceDistPartitionKey(relationId);
|
||||
char distributionMethod = DISTRIBUTE_BY_HASH;
|
||||
char *parentRelationName = generate_qualified_relation_name(relationId);
|
||||
bool viaDeprecatedAPI = false;
|
||||
|
@ -264,7 +264,10 @@ PostprocessAlterTableSchemaStmt(Node *node, const char *queryString)
|
|||
AlterObjectSchemaStmt *stmt = castNode(AlterObjectSchemaStmt, node);
|
||||
Assert(stmt->objectType == OBJECT_TABLE);
|
||||
|
||||
ObjectAddress tableAddress = GetObjectAddressFromParseTree((Node *) stmt, false);
|
||||
/*
|
||||
* We will let Postgres deal with missing_ok
|
||||
*/
|
||||
ObjectAddress tableAddress = GetObjectAddressFromParseTree((Node *) stmt, true);
|
||||
|
||||
if (!ShouldPropagate() || !IsDistributedTable(tableAddress.objectId))
|
||||
{
|
||||
|
@ -884,6 +887,12 @@ ErrorIfUnsupportedConstraint(Relation relation, char distributionMethod,
|
|||
return;
|
||||
}
|
||||
|
||||
if (distributionColumn == NULL)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR),
|
||||
errmsg("distribution column of distributed table is NULL")));
|
||||
}
|
||||
|
||||
char *relationName = RelationGetRelationName(relation);
|
||||
List *indexOidList = RelationGetIndexList(relation);
|
||||
|
||||
|
@ -1473,7 +1482,7 @@ AlterTableSchemaStmtObjectAddress(Node *node, bool missing_ok)
|
|||
if (stmt->relation->schemaname)
|
||||
{
|
||||
const char *schemaName = stmt->relation->schemaname;
|
||||
Oid schemaOid = get_namespace_oid(schemaName, false);
|
||||
Oid schemaOid = get_namespace_oid(schemaName, missing_ok);
|
||||
tableOid = get_relname_relid(tableName, schemaOid);
|
||||
}
|
||||
else
|
||||
|
|
|
@ -50,6 +50,7 @@
|
|||
#include "catalog/pg_enum.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "commands/extension.h"
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "distributed/commands.h"
|
||||
#include "distributed/commands/utility_hook.h"
|
||||
#include "distributed/deparser.h"
|
||||
|
@ -1075,16 +1076,17 @@ GenerateBackupNameForTypeCollision(const ObjectAddress *address)
|
|||
|
||||
while (true)
|
||||
{
|
||||
int suffixLength = snprintf(suffix, NAMEDATALEN - 1, "(citus_backup_%d)",
|
||||
count);
|
||||
int suffixLength = SafeSnprintf(suffix, NAMEDATALEN - 1, "(citus_backup_%d)",
|
||||
count);
|
||||
|
||||
/* trim the base name at the end to leave space for the suffix and trailing \0 */
|
||||
baseLength = Min(baseLength, NAMEDATALEN - suffixLength - 1);
|
||||
|
||||
/* clear newName before copying the potentially trimmed baseName and suffix */
|
||||
memset(newName, 0, NAMEDATALEN);
|
||||
strncpy(newName, baseName, baseLength);
|
||||
strncpy(newName + baseLength, suffix, suffixLength);
|
||||
strncpy_s(newName, NAMEDATALEN, baseName, baseLength);
|
||||
strncpy_s(newName + baseLength, NAMEDATALEN - baseLength, suffix,
|
||||
suffixLength);
|
||||
|
||||
rel->relname = newName;
|
||||
TypeName *newTypeName = makeTypeNameFromNameList(MakeNameListFromRangeVar(rel));
|
||||
|
|
|
@ -334,15 +334,15 @@ multi_ProcessUtility(PlannedStmt *pstmt,
|
|||
parsetree = copyObject(parsetree);
|
||||
parsetree = ProcessCopyStmt((CopyStmt *) parsetree, completionTag, queryString);
|
||||
|
||||
MemoryContext previousContext = MemoryContextSwitchTo(planContext);
|
||||
parsetree = copyObject(parsetree);
|
||||
MemoryContextSwitchTo(previousContext);
|
||||
|
||||
if (parsetree == NULL)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
MemoryContext previousContext = MemoryContextSwitchTo(planContext);
|
||||
parsetree = copyObject(parsetree);
|
||||
MemoryContextSwitchTo(previousContext);
|
||||
|
||||
/*
|
||||
* we need to set the parsetree here already as we copy and replace the original
|
||||
* parsetree during ddl propagation. In reality we need to refactor the code above
|
||||
|
|
|
@ -127,7 +127,8 @@ PostprocessVariableSetStmt(VariableSetStmt *setStmt, const char *setStmtString)
|
|||
/* haven't seen any SET stmts so far in this (sub-)xact: initialize StringInfo */
|
||||
if (activeSetStmts == NULL)
|
||||
{
|
||||
MemoryContext old_context = MemoryContextSwitchTo(CurTransactionContext);
|
||||
/* see comments in PushSubXact on why we allocate this in TopTransactionContext */
|
||||
MemoryContext old_context = MemoryContextSwitchTo(TopTransactionContext);
|
||||
activeSetStmts = makeStringInfo();
|
||||
MemoryContextSwitchTo(old_context);
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "distributed/connection_management.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/worker_manager.h"
|
||||
|
@ -76,7 +77,7 @@ InitConnParams()
|
|||
void
|
||||
ResetConnParams()
|
||||
{
|
||||
for (Index paramIdx = 0; paramIdx < ConnParams.size; paramIdx++)
|
||||
for (Size paramIdx = 0; paramIdx < ConnParams.size; paramIdx++)
|
||||
{
|
||||
free((void *) ConnParams.keywords[paramIdx]);
|
||||
free((void *) ConnParams.values[paramIdx]);
|
||||
|
@ -135,7 +136,6 @@ CheckConninfo(const char *conninfo, const char **whitelist,
|
|||
Size whitelistLength, char **errorMsg)
|
||||
{
|
||||
PQconninfoOption *option = NULL;
|
||||
Index whitelistIdx PG_USED_FOR_ASSERTS_ONLY = 0;
|
||||
char *errorMsgString = NULL;
|
||||
|
||||
/*
|
||||
|
@ -174,7 +174,7 @@ CheckConninfo(const char *conninfo, const char **whitelist,
|
|||
#ifdef USE_ASSERT_CHECKING
|
||||
|
||||
/* verify that the whitelist is in ascending order */
|
||||
for (whitelistIdx = 1; whitelistIdx < whitelistLength; whitelistIdx++)
|
||||
for (Size whitelistIdx = 1; whitelistIdx < whitelistLength; whitelistIdx++)
|
||||
{
|
||||
const char *prev = whitelist[whitelistIdx - 1];
|
||||
const char *curr = whitelist[whitelistIdx];
|
||||
|
@ -190,8 +190,8 @@ CheckConninfo(const char *conninfo, const char **whitelist,
|
|||
continue;
|
||||
}
|
||||
|
||||
void *matchingKeyword = bsearch(&option->keyword, whitelist, whitelistLength,
|
||||
sizeof(char *), pg_qsort_strcmp);
|
||||
void *matchingKeyword = SafeBsearch(&option->keyword, whitelist, whitelistLength,
|
||||
sizeof(char *), pg_qsort_strcmp);
|
||||
if (matchingKeyword == NULL)
|
||||
{
|
||||
/* the whitelist lacks this keyword; error out! */
|
||||
|
@ -290,7 +290,7 @@ GetConnParams(ConnectionHashKey *key, char ***keywords, char ***values,
|
|||
pg_ltoa(key->port, nodePortString); /* populate node port string with port */
|
||||
|
||||
/* first step: copy global parameters to beginning of array */
|
||||
for (Index paramIndex = 0; paramIndex < ConnParams.size; paramIndex++)
|
||||
for (Size paramIndex = 0; paramIndex < ConnParams.size; paramIndex++)
|
||||
{
|
||||
/* copy the keyword&value pointers to the new array */
|
||||
connKeywords[paramIndex] = ConnParams.keywords[paramIndex];
|
||||
|
@ -328,7 +328,7 @@ GetConnParams(ConnectionHashKey *key, char ***keywords, char ***values,
|
|||
const char *
|
||||
GetConnParam(const char *keyword)
|
||||
{
|
||||
for (Index i = 0; i < ConnParams.size; i++)
|
||||
for (Size i = 0; i < ConnParams.size; i++)
|
||||
{
|
||||
if (strcmp(keyword, ConnParams.keywords[i]) == 0)
|
||||
{
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
|
||||
#include "miscadmin.h"
|
||||
|
||||
#include "safe_lib.h"
|
||||
|
||||
#include "access/hash.h"
|
||||
#include "commands/dbcommands.h"
|
||||
#include "distributed/connection_management.h"
|
||||
|
@ -108,7 +110,8 @@ InitializeConnectionManagement(void)
|
|||
info.hcxt = ConnectionContext;
|
||||
uint32 hashFlags = (HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT | HASH_COMPARE);
|
||||
|
||||
memcpy(&connParamsInfo, &info, sizeof(HASHCTL));
|
||||
/* connParamsInfo is same as info, except for entrysize */
|
||||
connParamsInfo = info;
|
||||
connParamsInfo.entrysize = sizeof(ConnParamsHashEntry);
|
||||
|
||||
ConnectionHash = hash_create("citus connection cache (host,port,user,database)",
|
||||
|
@ -368,6 +371,16 @@ FindAvailableConnection(dlist_head *connections, uint32 flags)
|
|||
continue;
|
||||
}
|
||||
|
||||
if (connection->forceCloseAtTransactionEnd)
|
||||
{
|
||||
/*
|
||||
* This is a connection that should be closed, probabably because
|
||||
* of old connection options. So we ignore it. It will
|
||||
* automatically be closed at the end of the transaction.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((flags & REQUIRE_SIDECHANNEL) != 0)
|
||||
{
|
||||
if (connection->purpose == CONNECTION_PURPOSE_SIDECHANNEL ||
|
||||
|
@ -418,6 +431,37 @@ GivePurposeToConnection(MultiConnection *connection, int flags)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* CloseAllConnectionsAfterTransaction sets the forceClose flag of all the
|
||||
* connections. This is mainly done when citus.node_conninfo changes.
|
||||
*/
|
||||
void
|
||||
CloseAllConnectionsAfterTransaction(void)
|
||||
{
|
||||
if (ConnectionHash == NULL)
|
||||
{
|
||||
return;
|
||||
}
|
||||
HASH_SEQ_STATUS status;
|
||||
ConnectionHashEntry *entry;
|
||||
|
||||
hash_seq_init(&status, ConnectionHash);
|
||||
while ((entry = (ConnectionHashEntry *) hash_seq_search(&status)) != 0)
|
||||
{
|
||||
dlist_iter iter;
|
||||
|
||||
dlist_head *connections = entry->connections;
|
||||
dlist_foreach(iter, connections)
|
||||
{
|
||||
MultiConnection *connection =
|
||||
dlist_container(MultiConnection, connectionNode, iter.cur);
|
||||
|
||||
connection->forceCloseAtTransactionEnd = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CloseNodeConnectionsAfterTransaction sets the forceClose flag of the connections
|
||||
* to a particular node as true such that the connections are no longer cached. This
|
||||
|
@ -1107,7 +1151,7 @@ AfterXactHostConnectionHandling(ConnectionHashEntry *entry, bool isCommit)
|
|||
/*
|
||||
* ShouldShutdownConnection returns true if either one of the followings is true:
|
||||
* - The connection is citus initiated.
|
||||
* - Current cached connections is already at MaxCachedConnectionPerWorker
|
||||
* - Current cached connections is already at MaxCachedConnectionsPerWorker
|
||||
* - Connection is forced to close at the end of transaction
|
||||
* - Connection is not in OK state
|
||||
* - A transaction is still in progress (usually because we are cancelling a distributed transaction)
|
||||
|
@ -1212,7 +1256,8 @@ DefaultCitusNoticeProcessor(void *arg, const char *message)
|
|||
char *nodeName = connection->hostname;
|
||||
uint32 nodePort = connection->port;
|
||||
char *trimmedMessage = TrimLogLevel(message);
|
||||
char *level = strtok((char *) message, ":");
|
||||
char *strtokPosition;
|
||||
char *level = strtok_r((char *) message, ":", &strtokPosition);
|
||||
|
||||
ereport(CitusNoticeLogLevel,
|
||||
(errmsg("%s", ApplyLogRedaction(trimmedMessage)),
|
||||
|
|
|
@ -524,7 +524,13 @@ pg_get_tablecolumnoptionsdef_string(Oid tableRelationId)
|
|||
*/
|
||||
TupleDesc tupleDescriptor = RelationGetDescr(relation);
|
||||
|
||||
for (AttrNumber attributeIndex = 0; attributeIndex < tupleDescriptor->natts;
|
||||
if (tupleDescriptor->natts > MaxAttrNumber)
|
||||
{
|
||||
ereport(ERROR, (errmsg("bad number of tuple descriptor attributes")));
|
||||
}
|
||||
|
||||
for (AttrNumber attributeIndex = 0;
|
||||
attributeIndex < (AttrNumber) tupleDescriptor->natts;
|
||||
attributeIndex++)
|
||||
{
|
||||
Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex);
|
||||
|
|
|
@ -137,7 +137,7 @@ AppendAlterTypeStmt(StringInfo buf, AlterTableStmt *stmt)
|
|||
stmt->relation->relname);
|
||||
ListCell *cmdCell = NULL;
|
||||
|
||||
Assert(stmt->relkind = OBJECT_TYPE);
|
||||
Assert(stmt->relkind == OBJECT_TYPE);
|
||||
|
||||
appendStringInfo(buf, "ALTER TYPE %s", identifier);
|
||||
foreach(cmdCell, stmt->cmds)
|
||||
|
|
|
@ -967,6 +967,7 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
int ncolumns;
|
||||
char **real_colnames;
|
||||
bool changed_any;
|
||||
bool has_anonymous;
|
||||
int noldcolumns;
|
||||
int i;
|
||||
int j;
|
||||
|
@ -1054,6 +1055,7 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
*/
|
||||
noldcolumns = list_length(rte->eref->colnames);
|
||||
changed_any = false;
|
||||
has_anonymous = false;
|
||||
j = 0;
|
||||
for (i = 0; i < ncolumns; i++)
|
||||
{
|
||||
|
@ -1091,6 +1093,13 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
/* Remember if any assigned aliases differ from "real" name */
|
||||
if (!changed_any && strcmp(colname, real_colname) != 0)
|
||||
changed_any = true;
|
||||
|
||||
/*
|
||||
* Remember if there is a reference to an anonymous column as named by
|
||||
* char * FigureColname(Node *node)
|
||||
*/
|
||||
if (!has_anonymous && strcmp(real_colname, "?column?") == 0)
|
||||
has_anonymous = true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1120,7 +1129,7 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
else if (rte->alias && rte->alias->colnames != NIL)
|
||||
colinfo->printaliases = true;
|
||||
else
|
||||
colinfo->printaliases = changed_any;
|
||||
colinfo->printaliases = changed_any || has_anonymous;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -4419,9 +4428,20 @@ get_parameter(Param *param, deparse_context *context)
|
|||
}
|
||||
|
||||
/*
|
||||
* Not PARAM_EXEC, or couldn't find referent: just print $N.
|
||||
* Not PARAM_EXEC, or couldn't find referent: for base types just print $N.
|
||||
* For composite types, add cast to the parameter to ease remote node detect
|
||||
* the type.
|
||||
*/
|
||||
appendStringInfo(context->buf, "$%d", param->paramid);
|
||||
if (param->paramtype >= FirstNormalObjectId)
|
||||
{
|
||||
char *typeName = format_type_with_typemod(param->paramtype, param->paramtypmod);
|
||||
|
||||
appendStringInfo(context->buf, "$%d::%s", param->paramid, typeName);
|
||||
}
|
||||
else
|
||||
{
|
||||
appendStringInfo(context->buf, "$%d", param->paramid);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -968,6 +968,7 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
int ncolumns;
|
||||
char **real_colnames;
|
||||
bool changed_any;
|
||||
bool has_anonymous;
|
||||
int noldcolumns;
|
||||
int i;
|
||||
int j;
|
||||
|
@ -1055,6 +1056,7 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
*/
|
||||
noldcolumns = list_length(rte->eref->colnames);
|
||||
changed_any = false;
|
||||
has_anonymous = false;
|
||||
j = 0;
|
||||
for (i = 0; i < ncolumns; i++)
|
||||
{
|
||||
|
@ -1092,6 +1094,13 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
/* Remember if any assigned aliases differ from "real" name */
|
||||
if (!changed_any && strcmp(colname, real_colname) != 0)
|
||||
changed_any = true;
|
||||
|
||||
/*
|
||||
* Remember if there is a reference to an anonymous column as named by
|
||||
* char * FigureColname(Node *node)
|
||||
*/
|
||||
if (!has_anonymous && strcmp(real_colname, "?column?") == 0)
|
||||
has_anonymous = true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1121,7 +1130,7 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
|||
else if (rte->alias && rte->alias->colnames != NIL)
|
||||
colinfo->printaliases = true;
|
||||
else
|
||||
colinfo->printaliases = changed_any;
|
||||
colinfo->printaliases = changed_any || has_anonymous;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -4433,9 +4442,20 @@ get_parameter(Param *param, deparse_context *context)
|
|||
}
|
||||
|
||||
/*
|
||||
* Not PARAM_EXEC, or couldn't find referent: just print $N.
|
||||
* Not PARAM_EXEC, or couldn't find referent: for base types just print $N.
|
||||
* For composite types, add cast to the parameter to ease remote node detect
|
||||
* the type.
|
||||
*/
|
||||
appendStringInfo(context->buf, "$%d", param->paramid);
|
||||
if (param->paramtype >= FirstNormalObjectId)
|
||||
{
|
||||
char *typeName = format_type_with_typemod(param->paramtype, param->paramtypmod);
|
||||
|
||||
appendStringInfo(context->buf, "$%d::%s", param->paramid, typeName);
|
||||
}
|
||||
else
|
||||
{
|
||||
appendStringInfo(context->buf, "$%d", param->paramid);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -135,6 +135,7 @@
|
|||
#include "distributed/adaptive_executor.h"
|
||||
#include "distributed/cancel_utils.h"
|
||||
#include "distributed/citus_custom_scan.h"
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "distributed/connection_management.h"
|
||||
#include "distributed/deparse_shard_query.h"
|
||||
#include "distributed/distributed_execution_locks.h"
|
||||
|
@ -982,11 +983,13 @@ static TransactionProperties
|
|||
DecideTransactionPropertiesForTaskList(RowModifyLevel modLevel, List *taskList, bool
|
||||
exludeFromTransaction)
|
||||
{
|
||||
TransactionProperties xactProperties = {
|
||||
.errorOnAnyFailure = false,
|
||||
.useRemoteTransactionBlocks = TRANSACTION_BLOCKS_ALLOWED,
|
||||
.requires2PC = false
|
||||
};
|
||||
TransactionProperties xactProperties;
|
||||
|
||||
/* ensure uninitialized padding doesn't escape the function */
|
||||
memset_struct_0(xactProperties);
|
||||
xactProperties.errorOnAnyFailure = false;
|
||||
xactProperties.useRemoteTransactionBlocks = TRANSACTION_BLOCKS_ALLOWED;
|
||||
xactProperties.requires2PC = false;
|
||||
|
||||
if (taskList == NIL)
|
||||
{
|
||||
|
@ -3151,7 +3154,7 @@ StartPlacementExecutionOnSession(TaskPlacementExecution *placementExecution,
|
|||
session->currentTask = placementExecution;
|
||||
placementExecution->executionState = PLACEMENT_EXECUTION_RUNNING;
|
||||
|
||||
if (paramListInfo != NULL)
|
||||
if (paramListInfo != NULL && !task->parametersInQueryStringResolved)
|
||||
{
|
||||
int parameterCount = paramListInfo->numParams;
|
||||
Oid *parameterTypes = NULL;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "distributed/citus_clauses.h"
|
||||
#include "distributed/citus_custom_scan.h"
|
||||
#include "distributed/citus_nodefuncs.h"
|
||||
#include "distributed/citus_ruleutils.h"
|
||||
#include "distributed/deparse_shard_query.h"
|
||||
#include "distributed/distributed_execution_locks.h"
|
||||
#include "distributed/insert_select_executor.h"
|
||||
|
@ -47,16 +48,16 @@ static Node * DelayedErrorCreateScan(CustomScan *scan);
|
|||
|
||||
/* functions that are common to different scans */
|
||||
static void CitusBeginScan(CustomScanState *node, EState *estate, int eflags);
|
||||
static void CitusBeginScanWithCoordinatorProcessing(CustomScanState *node, EState *estate,
|
||||
int eflags);
|
||||
static void HandleDeferredShardPruningForFastPathQueries(
|
||||
DistributedPlan *distributedPlan);
|
||||
static void HandleDeferredShardPruningForInserts(DistributedPlan *distributedPlan);
|
||||
static void CacheLocalPlanForTask(Task *task, DistributedPlan *originalDistributedPlan);
|
||||
static DistributedPlan * CopyDistributedPlanWithoutCache(CitusScanState *scanState);
|
||||
static void ResetExecutionParameters(EState *executorState);
|
||||
static void CitusBeginScanWithoutCoordinatorProcessing(CustomScanState *node,
|
||||
EState *estate, int eflags);
|
||||
static void CitusBeginSelectScan(CustomScanState *node, EState *estate, int eflags);
|
||||
static void CitusBeginModifyScan(CustomScanState *node, EState *estate, int eflags);
|
||||
static void RegenerateTaskForFasthPathQuery(Job *workerJob);
|
||||
static void RegenerateTaskListForInsert(Job *workerJob);
|
||||
static void CacheLocalPlanForShardQuery(Task *task,
|
||||
DistributedPlan *originalDistributedPlan);
|
||||
static bool IsLocalPlanCachingSupported(Job *workerJob,
|
||||
DistributedPlan *originalDistributedPlan);
|
||||
static DistributedPlan * CopyDistributedPlanWithoutCache(
|
||||
DistributedPlan *originalDistributedPlan);
|
||||
static void CitusEndScan(CustomScanState *node);
|
||||
static void CitusReScan(CustomScanState *node);
|
||||
|
||||
|
@ -145,16 +146,22 @@ CitusBeginScan(CustomScanState *node, EState *estate, int eflags)
|
|||
#endif
|
||||
|
||||
DistributedPlan *distributedPlan = scanState->distributedPlan;
|
||||
Job *workerJob = distributedPlan->workerJob;
|
||||
if (workerJob &&
|
||||
(workerJob->requiresMasterEvaluation || workerJob->deferredPruning))
|
||||
if (distributedPlan->insertSelectQuery != NULL)
|
||||
{
|
||||
CitusBeginScanWithCoordinatorProcessing(node, estate, eflags);
|
||||
|
||||
/*
|
||||
* INSERT..SELECT via coordinator or re-partitioning are special because
|
||||
* the SELECT part is planned separately.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
CitusBeginScanWithoutCoordinatorProcessing(node, estate, eflags);
|
||||
else if (distributedPlan->modLevel == ROW_MODIFY_READONLY)
|
||||
{
|
||||
CitusBeginSelectScan(node, estate, eflags);
|
||||
}
|
||||
else
|
||||
{
|
||||
CitusBeginModifyScan(node, estate, eflags);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -183,179 +190,176 @@ CitusExecScan(CustomScanState *node)
|
|||
|
||||
|
||||
/*
|
||||
* CitusBeginScanWithoutCoordinatorProcessing is intended to work on all executions
|
||||
* that do not require any coordinator processing. The function simply acquires the
|
||||
* necessary locks on the shards involved in the task list of the distributed plan
|
||||
* and does the placement assignements. This implies that the function is a no-op for
|
||||
* SELECT queries as they do not require any locking and placement assignements.
|
||||
* CitusBeginSelectScan handles deferred pruning and plan caching for SELECTs.
|
||||
*/
|
||||
static void
|
||||
CitusBeginScanWithoutCoordinatorProcessing(CustomScanState *node, EState *estate, int
|
||||
eflags)
|
||||
CitusBeginSelectScan(CustomScanState *node, EState *estate, int eflags)
|
||||
{
|
||||
CitusScanState *scanState = (CitusScanState *) node;
|
||||
DistributedPlan *distributedPlan = scanState->distributedPlan;
|
||||
DistributedPlan *originalDistributedPlan = scanState->distributedPlan;
|
||||
|
||||
if (distributedPlan->modLevel == ROW_MODIFY_READONLY ||
|
||||
distributedPlan->insertSelectQuery != NULL)
|
||||
if (!originalDistributedPlan->workerJob->deferredPruning)
|
||||
{
|
||||
/*
|
||||
* For SELECT queries that have already been pruned we can proceed straight
|
||||
* to execution, since none of the prepared statement logic applies.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
/* we'll be modifying the distributed plan by assigning taskList, do it on a copy */
|
||||
distributedPlan = copyObject(distributedPlan);
|
||||
scanState->distributedPlan = distributedPlan;
|
||||
/*
|
||||
* Create a copy of the generic plan for the current execution, but make a shallow
|
||||
* copy of the plan cache. That means we'll be able to access the plan cache via
|
||||
* currentPlan->workerJob->localPlannedStatements, but it will be preserved across
|
||||
* executions by the prepared statement logic.
|
||||
*/
|
||||
DistributedPlan *currentPlan =
|
||||
CopyDistributedPlanWithoutCache(originalDistributedPlan);
|
||||
scanState->distributedPlan = currentPlan;
|
||||
|
||||
Job *workerJob = distributedPlan->workerJob;
|
||||
List *taskList = workerJob->taskList;
|
||||
Job *workerJob = currentPlan->workerJob;
|
||||
Query *jobQuery = workerJob->jobQuery;
|
||||
PlanState *planState = &(scanState->customScanState.ss.ps);
|
||||
|
||||
/*
|
||||
* These more complex jobs should have been evaluated in
|
||||
* CitusBeginScanWithCoordinatorProcessing.
|
||||
* We only do deferred pruning for fast path queries, which have a single
|
||||
* partition column value.
|
||||
*/
|
||||
Assert(!(workerJob->requiresMasterEvaluation || workerJob->deferredPruning));
|
||||
Assert(currentPlan->fastPathRouterPlan || !EnableFastPathRouterPlanner);
|
||||
|
||||
/* prevent concurrent placement changes */
|
||||
AcquireMetadataLocks(taskList);
|
||||
/*
|
||||
* Evaluate parameters, because the parameters are only available on the
|
||||
* coordinator and are required for pruning.
|
||||
*
|
||||
* We don't evaluate functions for read-only queries on the coordinator
|
||||
* at the moment. Most function calls would be in a context where they
|
||||
* should be re-evaluated for every row in case of volatile functions.
|
||||
*
|
||||
* TODO: evaluate stable functions
|
||||
*/
|
||||
ExecuteMasterEvaluableParameters(jobQuery, planState);
|
||||
|
||||
/* modify tasks are always assigned using first-replica policy */
|
||||
workerJob->taskList = FirstReplicaAssignTaskList(taskList);
|
||||
/* job query no longer has parameters, so we should not send any */
|
||||
workerJob->parametersInJobQueryResolved = true;
|
||||
|
||||
/* parameters are filled in, so we can generate a task for this execution */
|
||||
RegenerateTaskForFasthPathQuery(workerJob);
|
||||
|
||||
if (IsLocalPlanCachingSupported(workerJob, originalDistributedPlan))
|
||||
{
|
||||
Task *task = linitial(workerJob->taskList);
|
||||
|
||||
/*
|
||||
* We are going to execute this task locally. If it's not already in
|
||||
* the cache, create a local plan now and add it to the cache. During
|
||||
* execution, we will get the plan from the cache.
|
||||
*
|
||||
* The plan will be cached across executions when originalDistributedPlan
|
||||
* represents a prepared statement.
|
||||
*/
|
||||
CacheLocalPlanForShardQuery(task, originalDistributedPlan);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CitusBeginScanWithCoordinatorProcessing generates query strings at the start of the execution
|
||||
* in two cases: when the query requires master evaluation and/or deferred shard pruning.
|
||||
* CitusBeginModifyScan prepares the scan state for a modification.
|
||||
*
|
||||
* The function is also smart about caching plans if the plan is local to this node.
|
||||
* Modifications are special because:
|
||||
* a) we evaluate function calls (e.g. nextval) here and the outcome may
|
||||
* determine which shards are affected by this query.
|
||||
* b) we need to take metadata locks to make sure no write is left behind
|
||||
* when finalizing a shard move.
|
||||
*/
|
||||
static void
|
||||
CitusBeginScanWithCoordinatorProcessing(CustomScanState *node, EState *estate, int eflags)
|
||||
CitusBeginModifyScan(CustomScanState *node, EState *estate, int eflags)
|
||||
{
|
||||
CitusScanState *scanState = (CitusScanState *) node;
|
||||
DistributedPlan *originalDistributedPlan = scanState->distributedPlan;
|
||||
DistributedPlan *distributedPlan = CopyDistributedPlanWithoutCache(scanState);
|
||||
Job *workerJob = distributedPlan->workerJob;
|
||||
Query *jobQuery = workerJob->jobQuery;
|
||||
|
||||
/* we'd only get to this function with the following conditions */
|
||||
Assert(workerJob->requiresMasterEvaluation || workerJob->deferredPruning);
|
||||
|
||||
PlanState *planState = &(scanState->customScanState.ss.ps);
|
||||
DistributedPlan *originalDistributedPlan = scanState->distributedPlan;
|
||||
|
||||
/* citus only evaluates functions for modification queries */
|
||||
bool modifyQueryRequiresMasterEvaluation =
|
||||
jobQuery->commandType != CMD_SELECT &&
|
||||
(workerJob->requiresMasterEvaluation || workerJob->deferredPruning);
|
||||
DistributedPlan *currentPlan =
|
||||
CopyDistributedPlanWithoutCache(originalDistributedPlan);
|
||||
scanState->distributedPlan = currentPlan;
|
||||
|
||||
/*
|
||||
* ExecuteMasterEvaluableFunctions handles both function evalation
|
||||
* and parameter evaluation. Pruning is most likely deferred because
|
||||
* there is a parameter on the distribution key. So, evaluate in both
|
||||
* cases.
|
||||
*/
|
||||
if (modifyQueryRequiresMasterEvaluation)
|
||||
Job *workerJob = currentPlan->workerJob;
|
||||
Query *jobQuery = workerJob->jobQuery;
|
||||
bool evaluateAllExpressions = workerJob->requiresMasterEvaluation ||
|
||||
workerJob->deferredPruning;
|
||||
|
||||
if (evaluateAllExpressions)
|
||||
{
|
||||
/* evaluate functions and parameters for modification queries */
|
||||
/* evaluate both functions and parameters */
|
||||
ExecuteMasterEvaluableFunctionsAndParameters(jobQuery, planState);
|
||||
}
|
||||
else if (jobQuery->commandType == CMD_SELECT && !workerJob->deferredPruning)
|
||||
{
|
||||
/* we'll use generated strings, no need to have the parameters anymore */
|
||||
EState *executorState = planState->state;
|
||||
ResetExecutionParameters(executorState);
|
||||
|
||||
/* we're done, we don't want to evaluate functions for SELECT queries */
|
||||
return;
|
||||
/* job query no longer has parameters, so we should not send any */
|
||||
workerJob->parametersInJobQueryResolved = true;
|
||||
}
|
||||
else if (jobQuery->commandType == CMD_SELECT && workerJob->deferredPruning)
|
||||
|
||||
if (workerJob->deferredPruning)
|
||||
{
|
||||
/*
|
||||
* Evaluate parameters, because the parameters are only avaliable on the
|
||||
* coordinator and are required for pruning.
|
||||
* At this point, we're about to do the shard pruning for fast-path queries.
|
||||
* Given that pruning is deferred always for INSERTs, we get here
|
||||
* !EnableFastPathRouterPlanner as well.
|
||||
*/
|
||||
Assert(currentPlan->fastPathRouterPlan || !EnableFastPathRouterPlanner);
|
||||
|
||||
/*
|
||||
* We can only now decide which shard to use, so we need to build a new task
|
||||
* list.
|
||||
*/
|
||||
if (jobQuery->commandType == CMD_INSERT)
|
||||
{
|
||||
RegenerateTaskListForInsert(workerJob);
|
||||
}
|
||||
else
|
||||
{
|
||||
RegenerateTaskForFasthPathQuery(workerJob);
|
||||
}
|
||||
}
|
||||
else if (workerJob->requiresMasterEvaluation)
|
||||
{
|
||||
/*
|
||||
* When there is no deferred pruning, but we did evaluate functions, then
|
||||
* we only rebuild the query strings in the existing tasks.
|
||||
*/
|
||||
RebuildQueryStrings(workerJob);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that we know the shard ID(s) we can acquire the necessary shard metadata
|
||||
* locks. Once we have the locks it's safe to load the placement metadata.
|
||||
*/
|
||||
|
||||
/* prevent concurrent placement changes */
|
||||
AcquireMetadataLocks(workerJob->taskList);
|
||||
|
||||
/* modify tasks are always assigned using first-replica policy */
|
||||
workerJob->taskList = FirstReplicaAssignTaskList(workerJob->taskList);
|
||||
|
||||
/*
|
||||
* Now that we have populated the task placements we can determine whether
|
||||
* any of them are local to this node and cache a plan if needed.
|
||||
*/
|
||||
if (IsLocalPlanCachingSupported(workerJob, originalDistributedPlan))
|
||||
{
|
||||
Task *task = linitial(workerJob->taskList);
|
||||
|
||||
/*
|
||||
* We are going to execute this task locally. If it's not already in
|
||||
* the cache, create a local plan now and add it to the cache. During
|
||||
* execution, we will get the plan from the cache.
|
||||
*
|
||||
* But, we don't want to evaluate functions for read-only queries on the
|
||||
* coordinator as the volatile functions could yield different
|
||||
* results per shard (also per row) and could have side-effects.
|
||||
* WARNING: In this function we'll use the original plan with the original
|
||||
* query tree, meaning parameters and function calls are back and we'll
|
||||
* redo evaluation in the local (Postgres) executor. The reason we do this
|
||||
* is that we only need to cache one generic plan per shard.
|
||||
*
|
||||
* Note that Citus already errors out for modification queries during
|
||||
* planning when the query involve any volatile function that might
|
||||
* diverge the shards as such functions are expected to yield different
|
||||
* results per shard (also per row).
|
||||
* The plan will be cached across executions when originalDistributedPlan
|
||||
* represents a prepared statement.
|
||||
*/
|
||||
ExecuteMasterEvaluableParameters(jobQuery, planState);
|
||||
}
|
||||
|
||||
/*
|
||||
* After evaluating the function/parameters, we're done unless shard pruning
|
||||
* is also deferred.
|
||||
*/
|
||||
if (workerJob->requiresMasterEvaluation && !workerJob->deferredPruning)
|
||||
{
|
||||
RebuildQueryStrings(workerJob->jobQuery, workerJob->taskList);
|
||||
|
||||
/* we'll use generated strings, no need to have the parameters anymore */
|
||||
EState *executorState = planState->state;
|
||||
ResetExecutionParameters(executorState);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point, we're about to do the shard pruning for fast-path queries.
|
||||
* Given that pruning is deferred always for INSERTs, we get here
|
||||
* !EnableFastPathRouterPlanner as well.
|
||||
*/
|
||||
Assert(workerJob->deferredPruning &&
|
||||
(distributedPlan->fastPathRouterPlan || !EnableFastPathRouterPlanner));
|
||||
if (jobQuery->commandType == CMD_INSERT)
|
||||
{
|
||||
HandleDeferredShardPruningForInserts(distributedPlan);
|
||||
}
|
||||
else
|
||||
{
|
||||
HandleDeferredShardPruningForFastPathQueries(distributedPlan);
|
||||
}
|
||||
|
||||
if (jobQuery->commandType != CMD_SELECT)
|
||||
{
|
||||
/* prevent concurrent placement changes */
|
||||
AcquireMetadataLocks(workerJob->taskList);
|
||||
|
||||
/* modify tasks are always assigned using first-replica policy */
|
||||
workerJob->taskList = FirstReplicaAssignTaskList(workerJob->taskList);
|
||||
}
|
||||
|
||||
if (list_length(distributedPlan->workerJob->taskList) != 1)
|
||||
{
|
||||
/*
|
||||
* We might have zero shard queries or multi-row INSERTs at this point,
|
||||
* we only want to cache single task queries.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* As long as the task accesses local node and the query doesn't have
|
||||
* any volatile functions, we cache the local Postgres plan on the
|
||||
* shard for re-use.
|
||||
*/
|
||||
Task *task = linitial(distributedPlan->workerJob->taskList);
|
||||
if (EnableLocalExecution && TaskAccessesLocalNode(task) &&
|
||||
!contain_volatile_functions(
|
||||
(Node *) originalDistributedPlan->workerJob->jobQuery))
|
||||
{
|
||||
CacheLocalPlanForTask(task, originalDistributedPlan);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* If we're not going to use a cached plan, we'll use the query string that is
|
||||
* already generated where the parameters are replaced, so we should not have
|
||||
* the parameters anymore.
|
||||
*/
|
||||
EState *executorState = planState->state;
|
||||
ResetExecutionParameters(executorState);
|
||||
CacheLocalPlanForShardQuery(task, originalDistributedPlan);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -372,15 +376,13 @@ CitusBeginScanWithCoordinatorProcessing(CustomScanState *node, EState *estate, i
|
|||
* reasons, as they are immutable, so no need to have a deep copy.
|
||||
*/
|
||||
static DistributedPlan *
|
||||
CopyDistributedPlanWithoutCache(CitusScanState *scanState)
|
||||
CopyDistributedPlanWithoutCache(DistributedPlan *originalDistributedPlan)
|
||||
{
|
||||
DistributedPlan *originalDistributedPlan = scanState->distributedPlan;
|
||||
List *localPlannedStatements =
|
||||
originalDistributedPlan->workerJob->localPlannedStatements;
|
||||
originalDistributedPlan->workerJob->localPlannedStatements = NIL;
|
||||
|
||||
DistributedPlan *distributedPlan = copyObject(originalDistributedPlan);
|
||||
scanState->distributedPlan = distributedPlan;
|
||||
|
||||
/* set back the immutable field */
|
||||
originalDistributedPlan->workerJob->localPlannedStatements = localPlannedStatements;
|
||||
|
@ -391,30 +393,12 @@ CopyDistributedPlanWithoutCache(CitusScanState *scanState)
|
|||
|
||||
|
||||
/*
|
||||
* ResetExecutionParameters set the parameter list to NULL. See the function
|
||||
* for details.
|
||||
* CacheLocalPlanForShardQuery replaces the relation OIDs in the job query
|
||||
* with shard relation OIDs and then plans the query and caches the result
|
||||
* in the originalDistributedPlan (which may be preserved across executions).
|
||||
*/
|
||||
static void
|
||||
ResetExecutionParameters(EState *executorState)
|
||||
{
|
||||
/*
|
||||
* We've processed parameters in ExecuteMasterEvaluableFunctions and
|
||||
* don't need to send their values to workers, since they will be
|
||||
* represented as constants in the deparsed query. To avoid sending
|
||||
* parameter values, we set the parameter list to NULL.
|
||||
*/
|
||||
executorState->es_param_list_info = NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CacheLocalPlanForTask caches a plan that is local to this node in the
|
||||
* originalDistributedPlan.
|
||||
*
|
||||
* The basic idea is to be able to skip planning on the shards when possible.
|
||||
*/
|
||||
static void
|
||||
CacheLocalPlanForTask(Task *task, DistributedPlan *originalDistributedPlan)
|
||||
CacheLocalPlanForShardQuery(Task *task, DistributedPlan *originalDistributedPlan)
|
||||
{
|
||||
PlannedStmt *localPlan = GetCachedLocalPlan(task, originalDistributedPlan);
|
||||
if (localPlan != NULL)
|
||||
|
@ -462,6 +446,15 @@ CacheLocalPlanForTask(Task *task, DistributedPlan *originalDistributedPlan)
|
|||
return;
|
||||
}
|
||||
|
||||
if (IsLoggableLevel(DEBUG5))
|
||||
{
|
||||
StringInfo queryString = makeStringInfo();
|
||||
pg_get_query_def(shardQuery, queryString);
|
||||
|
||||
ereport(DEBUG5, (errmsg("caching plan for query: %s",
|
||||
queryString->data)));
|
||||
}
|
||||
|
||||
LockRelationOid(rangeTableEntry->relid, lockMode);
|
||||
|
||||
LocalPlannedStatement *localPlannedStatement = CitusMakeNode(LocalPlannedStatement);
|
||||
|
@ -506,18 +499,86 @@ GetCachedLocalPlan(Task *task, DistributedPlan *distributedPlan)
|
|||
|
||||
|
||||
/*
|
||||
* HandleDeferredShardPruningForInserts does the shard pruning for INSERT
|
||||
* IsLocalPlanCachingSupported returns whether (part of) the task can be planned
|
||||
* and executed locally and whether caching is supported (single shard, no volatile
|
||||
* functions).
|
||||
*/
|
||||
static bool
|
||||
IsLocalPlanCachingSupported(Job *currentJob, DistributedPlan *originalDistributedPlan)
|
||||
{
|
||||
if (!currentJob->deferredPruning)
|
||||
{
|
||||
/*
|
||||
* When not using deferred pruning we may have already replaced distributed
|
||||
* table RTEs with citus_extradata_container RTEs to pass the shard ID to the
|
||||
* deparser. In that case, we cannot pass the query tree directly to the
|
||||
* planner.
|
||||
*
|
||||
* If desired, we can relax this check by improving the implementation of
|
||||
* CacheLocalPlanForShardQuery to translate citus_extradata_container
|
||||
* to a shard relation OID.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
List *taskList = currentJob->taskList;
|
||||
if (list_length(taskList) != 1)
|
||||
{
|
||||
/* we only support plan caching for single shard queries */
|
||||
return false;
|
||||
}
|
||||
|
||||
Task *task = linitial(taskList);
|
||||
if (!TaskAccessesLocalNode(task))
|
||||
{
|
||||
/* not a local task */
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!EnableLocalExecution)
|
||||
{
|
||||
/* user requested not to use local execution */
|
||||
return false;
|
||||
}
|
||||
|
||||
if (TransactionConnectedToLocalGroup)
|
||||
{
|
||||
/* transaction already connected to localhost */
|
||||
return false;
|
||||
}
|
||||
|
||||
Query *originalJobQuery = originalDistributedPlan->workerJob->jobQuery;
|
||||
if (contain_volatile_functions((Node *) originalJobQuery))
|
||||
{
|
||||
/*
|
||||
* We do not cache plans with volatile functions in the query.
|
||||
*
|
||||
* The reason we care about volatile functions is primarily that we
|
||||
* already executed them in ExecuteMasterEvaluableFunctionsAndParameters
|
||||
* and since we're falling back to the original query tree here we would
|
||||
* execute them again if we execute the plan.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RegenerateTaskListForInsert does the shard pruning for an INSERT query
|
||||
* queries and rebuilds the query strings.
|
||||
*/
|
||||
static void
|
||||
HandleDeferredShardPruningForInserts(DistributedPlan *distributedPlan)
|
||||
RegenerateTaskListForInsert(Job *workerJob)
|
||||
{
|
||||
Job *workerJob = distributedPlan->workerJob;
|
||||
Query *jobQuery = workerJob->jobQuery;
|
||||
bool parametersInJobQueryResolved = workerJob->parametersInJobQueryResolved;
|
||||
DeferredErrorMessage *planningError = NULL;
|
||||
|
||||
/* need to perform shard pruning, rebuild the task list from scratch */
|
||||
List *taskList = RouterInsertTaskList(jobQuery, &planningError);
|
||||
List *taskList = RouterInsertTaskList(jobQuery, parametersInJobQueryResolved,
|
||||
&planningError);
|
||||
|
||||
if (planningError != NULL)
|
||||
{
|
||||
|
@ -527,26 +588,22 @@ HandleDeferredShardPruningForInserts(DistributedPlan *distributedPlan)
|
|||
workerJob->taskList = taskList;
|
||||
workerJob->partitionKeyValue = ExtractInsertPartitionKeyValue(jobQuery);
|
||||
|
||||
RebuildQueryStrings(jobQuery, workerJob->taskList);
|
||||
RebuildQueryStrings(workerJob);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* HandleDeferredShardPruningForFastPathQueries does the shard pruning for
|
||||
* RegenerateTaskForFasthPathQuery does the shard pruning for
|
||||
* UPDATE/DELETE/SELECT fast path router queries and rebuilds the query strings.
|
||||
*/
|
||||
static void
|
||||
HandleDeferredShardPruningForFastPathQueries(DistributedPlan *distributedPlan)
|
||||
RegenerateTaskForFasthPathQuery(Job *workerJob)
|
||||
{
|
||||
Assert(distributedPlan->fastPathRouterPlan);
|
||||
|
||||
Job *workerJob = distributedPlan->workerJob;
|
||||
|
||||
bool isMultiShardQuery = false;
|
||||
List *shardIntervalList =
|
||||
TargetShardIntervalForFastPathQuery(workerJob->jobQuery,
|
||||
&workerJob->partitionKeyValue,
|
||||
&isMultiShardQuery, NULL);
|
||||
&isMultiShardQuery, NULL,
|
||||
&workerJob->partitionKeyValue);
|
||||
|
||||
/*
|
||||
* A fast-path router query can only yield multiple shards when the parameter
|
||||
|
@ -707,10 +764,11 @@ static void
|
|||
CitusReScan(CustomScanState *node)
|
||||
{
|
||||
CitusScanState *scanState = (CitusScanState *) node;
|
||||
Job *workerJob = scanState->distributedPlan->workerJob;
|
||||
EState *executorState = ScanStateGetExecutorState(scanState);
|
||||
ParamListInfo paramListInfo = executorState->es_param_list_info;
|
||||
|
||||
if (paramListInfo != NULL)
|
||||
if (paramListInfo != NULL && !workerJob->parametersInJobQueryResolved)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("Cursors for queries on distributed tables with "
|
||||
|
|
|
@ -539,7 +539,7 @@ FragmentTransferTaskList(List *fragmentListTransfers)
|
|||
/* these should have already been pruned away in ColocationTransfers */
|
||||
Assert(targetNodeId != fragmentsTransfer->nodes.sourceNodeId);
|
||||
|
||||
WorkerNode *workerNode = LookupNodeByNodeId(targetNodeId);
|
||||
WorkerNode *workerNode = ForceLookupNodeByNodeId(targetNodeId);
|
||||
|
||||
ShardPlacement *targetPlacement = CitusMakeNode(ShardPlacement);
|
||||
targetPlacement->nodeName = workerNode->workerName;
|
||||
|
@ -571,7 +571,7 @@ QueryStringForFragmentsTransfer(NodeToNodeFragmentsTransfer *fragmentsTransfer)
|
|||
StringInfo fragmentNamesArrayString = makeStringInfo();
|
||||
int fragmentCount = 0;
|
||||
NodePair *nodePair = &fragmentsTransfer->nodes;
|
||||
WorkerNode *sourceNode = LookupNodeByNodeId(nodePair->sourceNodeId);
|
||||
WorkerNode *sourceNode = ForceLookupNodeByNodeId(nodePair->sourceNodeId);
|
||||
|
||||
appendStringInfoString(fragmentNamesArrayString, "ARRAY[");
|
||||
|
||||
|
|
|
@ -178,15 +178,25 @@ ExecuteLocalTaskList(CitusScanState *scanState, List *taskList)
|
|||
}
|
||||
else
|
||||
{
|
||||
Query *shardQuery = ParseQueryString(TaskQueryString(task), parameterTypes,
|
||||
numParams);
|
||||
int taskNumParams = numParams;
|
||||
Oid *taskParameterTypes = parameterTypes;
|
||||
|
||||
if (task->parametersInQueryStringResolved)
|
||||
{
|
||||
/*
|
||||
* Parameters were removed from the query string so do not pass them
|
||||
* here. Otherwise, we might see errors when passing custom types,
|
||||
* since their OIDs were set to 0 and their type is normally
|
||||
* inferred from
|
||||
*/
|
||||
taskNumParams = 0;
|
||||
taskParameterTypes = NULL;
|
||||
}
|
||||
|
||||
Query *shardQuery = ParseQueryString(TaskQueryString(task),
|
||||
taskParameterTypes,
|
||||
taskNumParams);
|
||||
|
||||
/*
|
||||
* We should not consider using CURSOR_OPT_FORCE_DISTRIBUTED in case of
|
||||
* intermediate results in the query. That'd trigger ExecuteLocalTaskPlan()
|
||||
* go through the distributed executor, which we do not want since the
|
||||
* query is already known to be local.
|
||||
*/
|
||||
int cursorOptions = 0;
|
||||
|
||||
/*
|
||||
|
|
|
@ -692,16 +692,17 @@ TrackerHash(const char *taskTrackerHashName, List *workerNodeList, char *userNam
|
|||
char *nodeName = workerNode->workerName;
|
||||
uint32 nodePort = workerNode->workerPort;
|
||||
|
||||
char taskStateHashName[MAXPGPATH];
|
||||
uint32 taskStateCount = 32;
|
||||
HASHCTL info;
|
||||
|
||||
/* insert task tracker into the tracker hash */
|
||||
TaskTracker *taskTracker = TrackerHashEnter(taskTrackerHash, nodeName, nodePort);
|
||||
|
||||
|
||||
/* for each task tracker, create hash to track its assigned tasks */
|
||||
snprintf(taskStateHashName, MAXPGPATH,
|
||||
"Task Tracker \"%s:%u\" Task State Hash", nodeName, nodePort);
|
||||
StringInfo taskStateHashName = makeStringInfo();
|
||||
appendStringInfo(taskStateHashName, "Task Tracker \"%s:%u\" Task State Hash",
|
||||
nodeName, nodePort);
|
||||
|
||||
memset(&info, 0, sizeof(info));
|
||||
info.keysize = sizeof(uint64) + sizeof(uint32);
|
||||
|
@ -710,12 +711,12 @@ TrackerHash(const char *taskTrackerHashName, List *workerNodeList, char *userNam
|
|||
info.hcxt = CurrentMemoryContext;
|
||||
int hashFlags = (HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
|
||||
|
||||
HTAB *taskStateHash = hash_create(taskStateHashName, taskStateCount, &info,
|
||||
HTAB *taskStateHash = hash_create(taskStateHashName->data, taskStateCount, &info,
|
||||
hashFlags);
|
||||
if (taskStateHash == NULL)
|
||||
{
|
||||
ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("could not initialize %s", taskStateHashName)));
|
||||
errmsg("could not initialize %s", taskStateHashName->data)));
|
||||
}
|
||||
|
||||
taskTracker->taskStateHash = taskStateHash;
|
||||
|
@ -781,7 +782,7 @@ TrackerHashEnter(HTAB *taskTrackerHash, char *nodeName, uint32 nodePort)
|
|||
}
|
||||
|
||||
/* init task tracker object with zeroed out task tracker key */
|
||||
memcpy(taskTracker, &taskTrackerKey, sizeof(TaskTracker));
|
||||
*taskTracker = taskTrackerKey;
|
||||
taskTracker->trackerStatus = TRACKER_CONNECT_START;
|
||||
taskTracker->connectionId = INVALID_CONNECTION_ID;
|
||||
taskTracker->currentTaskIndex = -1;
|
||||
|
|
|
@ -83,7 +83,8 @@ CreateTemporarySchemasForMergeTasks(Job *topLeveLJob)
|
|||
{
|
||||
List *jobIds = ExtractJobsInJobTree(topLeveLJob);
|
||||
char *createSchemasCommand = GenerateCreateSchemasCommand(jobIds, CurrentUserName());
|
||||
SendCommandToAllWorkers(createSchemasCommand, CitusExtensionOwnerName());
|
||||
SendCommandToWorkersInParallel(ALL_WORKERS, createSchemasCommand,
|
||||
CitusExtensionOwnerName());
|
||||
return jobIds;
|
||||
}
|
||||
|
||||
|
@ -167,9 +168,8 @@ GenerateJobCommands(List *jobIds, char *templateCommand)
|
|||
void
|
||||
DoRepartitionCleanup(List *jobIds)
|
||||
{
|
||||
SendOptionalCommandListToAllWorkers(list_make1(GenerateDeleteJobsCommand(
|
||||
jobIds)),
|
||||
CitusExtensionOwnerName());
|
||||
SendCommandToWorkersOptionalInParallel(ALL_WORKERS, GenerateDeleteJobsCommand(jobIds),
|
||||
CitusExtensionOwnerName());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -198,14 +198,14 @@ CreateShardsWithRoundRobinPolicy(Oid distributedTableId, int32 shardCount,
|
|||
uint32 roundRobinNodeIndex = shardIndex % workerNodeCount;
|
||||
|
||||
/* initialize the hash token space for this shard */
|
||||
int32 shardMinHashToken = INT32_MIN + (shardIndex * hashTokenIncrement);
|
||||
int32 shardMinHashToken = PG_INT32_MIN + (shardIndex * hashTokenIncrement);
|
||||
int32 shardMaxHashToken = shardMinHashToken + (hashTokenIncrement - 1);
|
||||
uint64 shardId = GetNextShardId();
|
||||
|
||||
/* if we are at the last shard, make sure the max token value is INT_MAX */
|
||||
if (shardIndex == (shardCount - 1))
|
||||
{
|
||||
shardMaxHashToken = INT32_MAX;
|
||||
shardMaxHashToken = PG_INT32_MAX;
|
||||
}
|
||||
|
||||
/* insert the shard metadata row along with its min/max values */
|
||||
|
|
|
@ -162,7 +162,7 @@ master_apply_delete_command(PG_FUNCTION_ARGS)
|
|||
else if (partitionMethod == DISTRIBUTE_BY_NONE)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("cannot delete from distributed table"),
|
||||
errmsg("cannot delete from reference table"),
|
||||
errdetail("Delete statements on reference tables "
|
||||
"are not supported.")));
|
||||
}
|
||||
|
@ -521,7 +521,7 @@ CheckDeleteCriteria(Node *deleteCriteria)
|
|||
static void
|
||||
CheckPartitionColumn(Oid relationId, Node *whereClause)
|
||||
{
|
||||
Var *partitionColumn = DistPartitionKey(relationId);
|
||||
Var *partitionColumn = ForceDistPartitionKey(relationId);
|
||||
ListCell *columnCell = NULL;
|
||||
|
||||
List *columnList = pull_var_clause_default(whereClause);
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "distributed/colocation_utils.h"
|
||||
#include "distributed/connection_management.h"
|
||||
#include "distributed/citus_nodes.h"
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/master_metadata_utility.h"
|
||||
#include "distributed/master_protocol.h"
|
||||
|
@ -230,7 +231,7 @@ DistributedTableSizeOnWorker(WorkerNode *workerNode, Oid relationId, char *sizeQ
|
|||
List *sizeList = ReadFirstColumnAsText(result);
|
||||
StringInfo tableSizeStringInfo = (StringInfo) linitial(sizeList);
|
||||
char *tableSizeString = tableSizeStringInfo->data;
|
||||
uint64 tableSize = atol(tableSizeString);
|
||||
uint64 tableSize = SafeStringToUint64(tableSizeString);
|
||||
|
||||
PQclear(result);
|
||||
ClearResults(connection, raiseErrors);
|
||||
|
@ -608,7 +609,7 @@ void
|
|||
CopyShardPlacement(ShardPlacement *srcPlacement, ShardPlacement *destPlacement)
|
||||
{
|
||||
/* first copy all by-value fields */
|
||||
memcpy(destPlacement, srcPlacement, sizeof(ShardPlacement));
|
||||
*destPlacement = *srcPlacement;
|
||||
|
||||
/* and then the fields pointing to external values */
|
||||
if (srcPlacement->nodeName)
|
||||
|
|
|
@ -221,7 +221,6 @@ RepairShardPlacement(int64 shardId, char *sourceNodeName, int32 sourceNodePort,
|
|||
|
||||
char relationKind = get_rel_relkind(distributedTableId);
|
||||
char *tableOwner = TableOwner(shardInterval->relationId);
|
||||
bool missingOk = false;
|
||||
|
||||
|
||||
/* prevent table from being dropped */
|
||||
|
@ -319,9 +318,9 @@ RepairShardPlacement(int64 shardId, char *sourceNodeName, int32 sourceNodePort,
|
|||
|
||||
/* after successful repair, we update shard state as healthy*/
|
||||
List *placementList = ShardPlacementList(shardId);
|
||||
ShardPlacement *placement = SearchShardPlacementInList(placementList, targetNodeName,
|
||||
targetNodePort,
|
||||
missingOk);
|
||||
ShardPlacement *placement = ForceSearchShardPlacementInList(placementList,
|
||||
targetNodeName,
|
||||
targetNodePort);
|
||||
UpdateShardPlacementState(placement->placementId, SHARD_STATE_ACTIVE);
|
||||
}
|
||||
|
||||
|
@ -375,23 +374,19 @@ EnsureShardCanBeRepaired(int64 shardId, char *sourceNodeName, int32 sourceNodePo
|
|||
char *targetNodeName, int32 targetNodePort)
|
||||
{
|
||||
List *shardPlacementList = ShardPlacementList(shardId);
|
||||
bool missingSourceOk = false;
|
||||
bool missingTargetOk = false;
|
||||
|
||||
ShardPlacement *sourcePlacement = SearchShardPlacementInList(shardPlacementList,
|
||||
sourceNodeName,
|
||||
sourceNodePort,
|
||||
missingSourceOk);
|
||||
ShardPlacement *sourcePlacement = ForceSearchShardPlacementInList(shardPlacementList,
|
||||
sourceNodeName,
|
||||
sourceNodePort);
|
||||
if (sourcePlacement->shardState != SHARD_STATE_ACTIVE)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("source placement must be in active state")));
|
||||
}
|
||||
|
||||
ShardPlacement *targetPlacement = SearchShardPlacementInList(shardPlacementList,
|
||||
targetNodeName,
|
||||
targetNodePort,
|
||||
missingTargetOk);
|
||||
ShardPlacement *targetPlacement = ForceSearchShardPlacementInList(shardPlacementList,
|
||||
targetNodeName,
|
||||
targetNodePort);
|
||||
if (targetPlacement->shardState != SHARD_STATE_INACTIVE)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
|
@ -402,15 +397,13 @@ EnsureShardCanBeRepaired(int64 shardId, char *sourceNodeName, int32 sourceNodePo
|
|||
|
||||
/*
|
||||
* SearchShardPlacementInList searches a provided list for a shard placement with the
|
||||
* specified node name and port. If missingOk is set to true, this function returns NULL
|
||||
* if no such placement exists in the provided list, otherwise it throws an error.
|
||||
* specified node name and port. This function returns NULL if no such
|
||||
* placement exists in the provided list.
|
||||
*/
|
||||
ShardPlacement *
|
||||
SearchShardPlacementInList(List *shardPlacementList, char *nodeName, uint32 nodePort, bool
|
||||
missingOk)
|
||||
SearchShardPlacementInList(List *shardPlacementList, char *nodeName, uint32 nodePort)
|
||||
{
|
||||
ListCell *shardPlacementCell = NULL;
|
||||
ShardPlacement *matchingPlacement = NULL;
|
||||
|
||||
foreach(shardPlacementCell, shardPlacementList)
|
||||
{
|
||||
|
@ -419,25 +412,31 @@ SearchShardPlacementInList(List *shardPlacementList, char *nodeName, uint32 node
|
|||
if (strncmp(nodeName, shardPlacement->nodeName, MAX_NODE_LENGTH) == 0 &&
|
||||
nodePort == shardPlacement->nodePort)
|
||||
{
|
||||
matchingPlacement = shardPlacement;
|
||||
break;
|
||||
return shardPlacement;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (matchingPlacement == NULL)
|
||||
|
||||
/*
|
||||
* ForceSearchShardPlacementInList searches a provided list for a shard
|
||||
* placement with the specified node name and port. This function throws an
|
||||
* error if no such placement exists in the provided list.
|
||||
*/
|
||||
ShardPlacement *
|
||||
ForceSearchShardPlacementInList(List *shardPlacementList, char *nodeName, uint32 nodePort)
|
||||
{
|
||||
ShardPlacement *placement = SearchShardPlacementInList(shardPlacementList, nodeName,
|
||||
nodePort);
|
||||
if (placement == NULL)
|
||||
{
|
||||
if (missingOk)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("could not find placement matching \"%s:%d\"",
|
||||
nodeName, nodePort),
|
||||
errhint("Confirm the placement still exists and try again.")));
|
||||
}
|
||||
|
||||
return matchingPlacement;
|
||||
return placement;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -123,7 +123,7 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
|
|||
*/
|
||||
if (relationKind == RELKIND_FOREIGN_TABLE)
|
||||
{
|
||||
bool cstoreTable = cstoreTable = CStoreTable(relationId);
|
||||
bool cstoreTable = CStoreTable(relationId);
|
||||
if (cstoreTable)
|
||||
{
|
||||
storageType = SHARD_STORAGE_COLUMNAR;
|
||||
|
|
|
@ -355,7 +355,7 @@ FilterActiveNodeListFunc(LOCKMODE lockMode, bool (*checkFunction)(WorkerNode *))
|
|||
if (workerNode->isActive && checkFunction(workerNode))
|
||||
{
|
||||
WorkerNode *workerNodeCopy = palloc0(sizeof(WorkerNode));
|
||||
memcpy(workerNodeCopy, workerNode, sizeof(WorkerNode));
|
||||
*workerNodeCopy = *workerNode;
|
||||
workerNodeList = lappend(workerNodeList, workerNodeCopy);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -438,7 +438,7 @@ LoadGroupShardPlacement(uint64 shardId, uint64 placementId)
|
|||
{
|
||||
GroupShardPlacement *shardPlacement = CitusMakeNode(GroupShardPlacement);
|
||||
|
||||
memcpy(shardPlacement, &placementArray[i], sizeof(GroupShardPlacement));
|
||||
*shardPlacement = placementArray[i];
|
||||
|
||||
return shardPlacement;
|
||||
}
|
||||
|
@ -513,9 +513,11 @@ ResolveGroupShardPlacement(GroupShardPlacement *groupShardPlacement,
|
|||
WorkerNode *workerNode = LookupNodeForGroup(groupId);
|
||||
|
||||
/* copy everything into shardPlacement but preserve the header */
|
||||
memcpy((((CitusNode *) shardPlacement) + 1),
|
||||
(((CitusNode *) groupShardPlacement) + 1),
|
||||
sizeof(GroupShardPlacement) - sizeof(CitusNode));
|
||||
CitusNode header = shardPlacement->type;
|
||||
GroupShardPlacement *shardPlacementAsGroupPlacement =
|
||||
(GroupShardPlacement *) shardPlacement;
|
||||
*shardPlacementAsGroupPlacement = *groupShardPlacement;
|
||||
shardPlacement->type = header;
|
||||
|
||||
shardPlacement->nodeName = pstrdup(workerNode->workerName);
|
||||
shardPlacement->nodePort = workerNode->workerPort;
|
||||
|
@ -561,7 +563,7 @@ LookupNodeByNodeId(uint32 nodeId)
|
|||
if (workerNode->nodeId == nodeId)
|
||||
{
|
||||
WorkerNode *workerNodeCopy = palloc0(sizeof(WorkerNode));
|
||||
memcpy(workerNodeCopy, workerNode, sizeof(WorkerNode));
|
||||
*workerNodeCopy = *workerNode;
|
||||
|
||||
return workerNodeCopy;
|
||||
}
|
||||
|
@ -571,6 +573,22 @@ LookupNodeByNodeId(uint32 nodeId)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ForceLookupNodeByNodeId returns a worker node by nodeId or errors out if the
|
||||
* node cannot be found.
|
||||
*/
|
||||
WorkerNode *
|
||||
ForceLookupNodeByNodeId(uint32 nodeId)
|
||||
{
|
||||
WorkerNode *node = LookupNodeByNodeId(nodeId);
|
||||
if (node == NULL)
|
||||
{
|
||||
ereport(ERROR, (errmsg("node %d could not be found", nodeId)));
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* LookupNodeForGroup searches the WorkerNodeHash for a worker which is a member of the
|
||||
* given group and also readable (a primary if we're reading from primaries, a secondary
|
||||
|
@ -613,20 +631,19 @@ LookupNodeForGroup(int32 groupId)
|
|||
{
|
||||
ereport(ERROR, (errmsg("node group %d does not have a primary node",
|
||||
groupId)));
|
||||
return NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
case USE_SECONDARY_NODES_ALWAYS:
|
||||
{
|
||||
ereport(ERROR, (errmsg("node group %d does not have a secondary node",
|
||||
groupId)));
|
||||
return NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
ereport(FATAL, (errmsg("unrecognized value for use_secondary_nodes")));
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1374,12 +1391,12 @@ HasUniformHashDistribution(ShardInterval **shardIntervalArray,
|
|||
for (int shardIndex = 0; shardIndex < shardIntervalArrayLength; shardIndex++)
|
||||
{
|
||||
ShardInterval *shardInterval = shardIntervalArray[shardIndex];
|
||||
int32 shardMinHashToken = INT32_MIN + (shardIndex * hashTokenIncrement);
|
||||
int32 shardMinHashToken = PG_INT32_MIN + (shardIndex * hashTokenIncrement);
|
||||
int32 shardMaxHashToken = shardMinHashToken + (hashTokenIncrement - 1);
|
||||
|
||||
if (shardIndex == (shardIntervalArrayLength - 1))
|
||||
{
|
||||
shardMaxHashToken = INT32_MAX;
|
||||
shardMaxHashToken = PG_INT32_MAX;
|
||||
}
|
||||
|
||||
if (DatumGetInt32(shardInterval->minValue) != shardMinHashToken ||
|
||||
|
@ -1785,8 +1802,6 @@ AvailableExtensionVersion(void)
|
|||
|
||||
ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("citus extension is not found")));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
@ -2309,7 +2324,7 @@ CurrentDatabaseName(void)
|
|||
char *databaseName = get_database_name(MyDatabaseId);
|
||||
if (databaseName == NULL)
|
||||
{
|
||||
return NULL;
|
||||
ereport(ERROR, (errmsg("database that is connected to does not exist")));
|
||||
}
|
||||
|
||||
strlcpy(MetadataCache.databaseName, databaseName, NAMEDATALEN);
|
||||
|
@ -2405,13 +2420,13 @@ CurrentUserName(void)
|
|||
|
||||
|
||||
/*
|
||||
* LookupTypeOid returns the Oid of the "pg_catalog.{typeNameString}" type, or
|
||||
* LookupTypeOid returns the Oid of the "{schemaNameSting}.{typeNameString}" type, or
|
||||
* InvalidOid if it does not exist.
|
||||
*/
|
||||
static Oid
|
||||
LookupTypeOid(char *typeNameString)
|
||||
Oid
|
||||
LookupTypeOid(char *schemaNameSting, char *typeNameString)
|
||||
{
|
||||
Value *schemaName = makeString("pg_catalog");
|
||||
Value *schemaName = makeString(schemaNameSting);
|
||||
Value *typeName = makeString(typeNameString);
|
||||
List *qualifiedName = list_make2(schemaName, typeName);
|
||||
TypeName *enumTypeName = makeTypeNameFromNameList(qualifiedName);
|
||||
|
@ -2443,7 +2458,7 @@ LookupTypeOid(char *typeNameString)
|
|||
static Oid
|
||||
LookupStringEnumValueId(char *enumName, char *valueName)
|
||||
{
|
||||
Oid enumTypeId = LookupTypeOid(enumName);
|
||||
Oid enumTypeId = LookupTypeOid("pg_catalog", enumName);
|
||||
|
||||
if (enumTypeId == InvalidOid)
|
||||
{
|
||||
|
@ -3597,7 +3612,7 @@ LookupDistPartitionTuple(Relation pgDistPartition, Oid relationId)
|
|||
ScanKeyData scanKey[1];
|
||||
|
||||
/* copy scankey to local copy, it will be modified during the scan */
|
||||
memcpy(scanKey, DistPartitionScanKey, sizeof(DistPartitionScanKey));
|
||||
scanKey[0] = DistPartitionScanKey[0];
|
||||
|
||||
/* set scan arguments */
|
||||
scanKey[0].sk_argument = ObjectIdGetDatum(relationId);
|
||||
|
@ -3631,7 +3646,7 @@ LookupDistShardTuples(Oid relationId)
|
|||
Relation pgDistShard = heap_open(DistShardRelationId(), AccessShareLock);
|
||||
|
||||
/* copy scankey to local copy, it will be modified during the scan */
|
||||
memcpy(scanKey, DistShardScanKey, sizeof(DistShardScanKey));
|
||||
scanKey[0] = DistShardScanKey[0];
|
||||
|
||||
/* set scan arguments */
|
||||
scanKey[0].sk_argument = ObjectIdGetDatum(relationId);
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "catalog/namespace.h"
|
||||
#include "commands/sequence.h"
|
||||
#include "distributed/citus_acquire_lock.h"
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "distributed/colocation_utils.h"
|
||||
#include "distributed/commands.h"
|
||||
#include "distributed/commands/utility_hook.h"
|
||||
|
@ -109,11 +110,14 @@ PG_FUNCTION_INFO_V1(get_shard_id_for_distribution_column);
|
|||
static NodeMetadata
|
||||
DefaultNodeMetadata()
|
||||
{
|
||||
NodeMetadata nodeMetadata = {
|
||||
.nodeRack = WORKER_DEFAULT_RACK,
|
||||
.shouldHaveShards = true,
|
||||
.groupId = INVALID_GROUP_ID,
|
||||
};
|
||||
NodeMetadata nodeMetadata;
|
||||
|
||||
/* ensure uninitialized padding doesn't escape the function */
|
||||
memset_struct_0(nodeMetadata);
|
||||
nodeMetadata.nodeRack = WORKER_DEFAULT_RACK;
|
||||
nodeMetadata.shouldHaveShards = true;
|
||||
nodeMetadata.groupId = INVALID_GROUP_ID;
|
||||
|
||||
return nodeMetadata;
|
||||
}
|
||||
|
||||
|
@ -827,7 +831,7 @@ get_shard_id_for_distribution_column(PG_FUNCTION_ARGS)
|
|||
Oid inputDataType = get_fn_expr_argtype(fcinfo->flinfo, 1);
|
||||
char *distributionValueString = DatumToString(inputDatum, inputDataType);
|
||||
|
||||
Var *distributionColumn = DistPartitionKey(relationId);
|
||||
Var *distributionColumn = ForceDistPartitionKey(relationId);
|
||||
Oid distributionDataType = distributionColumn->vartype;
|
||||
|
||||
Datum distributionValueDatum = StringToDatum(distributionValueString,
|
||||
|
@ -873,7 +877,7 @@ FindWorkerNode(char *nodeName, int32 nodePort)
|
|||
if (handleFound)
|
||||
{
|
||||
WorkerNode *workerNode = (WorkerNode *) palloc(sizeof(WorkerNode));
|
||||
memcpy(workerNode, cachedWorkerNode, sizeof(WorkerNode));
|
||||
*workerNode = *cachedWorkerNode;
|
||||
return workerNode;
|
||||
}
|
||||
|
||||
|
@ -881,6 +885,23 @@ FindWorkerNode(char *nodeName, int32 nodePort)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* FindWorkerNode searches over the worker nodes and returns the workerNode
|
||||
* if it exists otherwise it errors out.
|
||||
*/
|
||||
WorkerNode *
|
||||
ForceFindWorkerNode(char *nodeName, int32 nodePort)
|
||||
{
|
||||
WorkerNode *node = FindWorkerNode(nodeName, nodePort);
|
||||
if (node == NULL)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_NO_DATA_FOUND),
|
||||
errmsg("node %s:%d not found", nodeName, nodePort)));
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* FindWorkerNodeAnyCluster returns the workerNode no matter which cluster it is a part
|
||||
* of. FindWorkerNodes, like almost every other function, acts as if nodes in other
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "distributed/citus_ruleutils.h"
|
||||
#include "distributed/deparse_shard_query.h"
|
||||
#include "distributed/insert_select_planner.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/local_executor.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/multi_physical_planner.h"
|
||||
|
@ -46,18 +47,20 @@ static char * DeparseTaskQuery(Task *task, Query *query);
|
|||
* include execution-time changes such as function evaluation.
|
||||
*/
|
||||
void
|
||||
RebuildQueryStrings(Query *originalQuery, List *taskList)
|
||||
RebuildQueryStrings(Job *workerJob)
|
||||
{
|
||||
ListCell *taskCell = NULL;
|
||||
Query *originalQuery = workerJob->jobQuery;
|
||||
List *taskList = workerJob->taskList;
|
||||
Oid relationId = ((RangeTblEntry *) linitial(originalQuery->rtable))->relid;
|
||||
RangeTblEntry *valuesRTE = ExtractDistributedInsertValuesRTE(originalQuery);
|
||||
|
||||
foreach(taskCell, taskList)
|
||||
Task *task = NULL;
|
||||
|
||||
foreach_ptr(task, taskList)
|
||||
{
|
||||
Task *task = (Task *) lfirst(taskCell);
|
||||
Query *query = originalQuery;
|
||||
|
||||
if (UpdateOrDeleteQuery(query) && list_length(taskList))
|
||||
if (UpdateOrDeleteQuery(query) && list_length(taskList) > 1)
|
||||
{
|
||||
query = copyObject(originalQuery);
|
||||
}
|
||||
|
@ -115,6 +118,12 @@ RebuildQueryStrings(Query *originalQuery, List *taskList)
|
|||
|
||||
UpdateTaskQueryString(query, relationId, valuesRTE, task);
|
||||
|
||||
/*
|
||||
* If parameters were resolved in the job query, then they are now also
|
||||
* resolved in the query string.
|
||||
*/
|
||||
task->parametersInQueryStringResolved = workerJob->parametersInJobQueryResolved;
|
||||
|
||||
ereport(DEBUG4, (errmsg("query after rebuilding: %s",
|
||||
ApplyLogRedaction(TaskQueryString(task)))));
|
||||
}
|
||||
|
|
|
@ -976,6 +976,7 @@ CreateDistributedPlan(uint64 planId, Query *originalQuery, Query *query, ParamLi
|
|||
*/
|
||||
originalQuery = (Query *) ResolveExternalParams((Node *) originalQuery,
|
||||
boundParams);
|
||||
Assert(originalQuery != NULL);
|
||||
|
||||
/*
|
||||
* Plan subqueries and CTEs that cannot be pushed down by recursively
|
||||
|
@ -1025,11 +1026,14 @@ CreateDistributedPlan(uint64 planId, Query *originalQuery, Query *query, ParamLi
|
|||
standard_planner(newQuery, 0, boundParams);
|
||||
|
||||
/* overwrite the old transformed query with the new transformed query */
|
||||
memcpy(query, newQuery, sizeof(Query));
|
||||
*query = *newQuery;
|
||||
|
||||
/* recurse into CreateDistributedPlan with subqueries/CTEs replaced */
|
||||
distributedPlan = CreateDistributedPlan(planId, originalQuery, query, NULL, false,
|
||||
plannerRestrictionContext);
|
||||
|
||||
/* distributedPlan cannot be null since hasUnresolvedParams argument was false */
|
||||
Assert(distributedPlan != NULL);
|
||||
distributedPlan->subPlanList = subPlanList;
|
||||
|
||||
FinalizeDistributedPlan(distributedPlan, originalQuery);
|
||||
|
@ -1700,8 +1704,7 @@ CheckNodeCopyAndSerialization(Node *node)
|
|||
{
|
||||
#ifdef USE_ASSERT_CHECKING
|
||||
char *out = nodeToString(node);
|
||||
Node *deserializedNode = (Node *) stringToNode(out);
|
||||
Node *nodeCopy = copyObject(deserializedNode);
|
||||
Node *nodeCopy = copyObject(node);
|
||||
char *outCopy = nodeToString(nodeCopy);
|
||||
|
||||
pfree(out);
|
||||
|
|
|
@ -356,26 +356,17 @@ ConjunctionContainsColumnFilter(Node *node, Var *column, Node **distributionKeyV
|
|||
static bool
|
||||
DistKeyInSimpleOpExpression(Expr *clause, Var *distColumn, Node **distributionKeyValue)
|
||||
{
|
||||
Node *leftOperand = NULL;
|
||||
Node *rightOperand = NULL;
|
||||
Param *paramClause = NULL;
|
||||
Const *constantClause = NULL;
|
||||
|
||||
Var *columnInExpr = NULL;
|
||||
|
||||
if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
|
||||
Node *leftOperand;
|
||||
Node *rightOperand;
|
||||
if (!BinaryOpExpression(clause, &leftOperand, &rightOperand))
|
||||
{
|
||||
leftOperand = get_leftop(clause);
|
||||
rightOperand = get_rightop(clause);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false; /* not a binary opclause */
|
||||
}
|
||||
|
||||
/* strip coercions before doing check */
|
||||
leftOperand = strip_implicit_coercions(leftOperand);
|
||||
rightOperand = strip_implicit_coercions(rightOperand);
|
||||
|
||||
if (IsA(rightOperand, Param) && IsA(leftOperand, Var))
|
||||
{
|
||||
|
|
|
@ -106,7 +106,7 @@ RecordSubplanExecutionsOnNodes(HTAB *intermediateResultsHash,
|
|||
List *usedSubPlanNodeList = distributedPlan->usedSubPlanNodeList;
|
||||
List *subPlanList = distributedPlan->subPlanList;
|
||||
ListCell *subPlanCell = NULL;
|
||||
int workerNodeCount = GetWorkerNodeCount();
|
||||
int workerNodeCount = ActiveReadableWorkerNodeCount();
|
||||
|
||||
foreach(subPlanCell, usedSubPlanNodeList)
|
||||
{
|
||||
|
@ -124,13 +124,14 @@ RecordSubplanExecutionsOnNodes(HTAB *intermediateResultsHash,
|
|||
}
|
||||
|
||||
/*
|
||||
* There is no need to traverse the whole plan if the intermediate result
|
||||
* will be written to a local file and send to all nodes
|
||||
* There is no need to traverse the subplan if the intermediate result
|
||||
* will be written to a local file and sent to all nodes. Note that the
|
||||
* remaining subplans in the distributed plan should still be traversed.
|
||||
*/
|
||||
if (list_length(entry->nodeIdList) == workerNodeCount && entry->writeLocalFile)
|
||||
{
|
||||
elog(DEBUG4, "Subplan %s is used in all workers", resultId);
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
else if (usedPlan->locationMask & SUBPLAN_ACCESS_REMOTE)
|
||||
{
|
||||
|
@ -139,7 +140,7 @@ RecordSubplanExecutionsOnNodes(HTAB *intermediateResultsHash,
|
|||
*
|
||||
* If we have reference tables in the distributed plan, all the
|
||||
* workers will be in the node list. We can improve intermediate result
|
||||
* pruning by deciding which reference table shard will be accessed earlier
|
||||
* pruning by deciding which reference table shard will be accessed earlier.
|
||||
*/
|
||||
AppendAllAccessedWorkerNodes(entry, distributedPlan, workerNodeCount);
|
||||
|
||||
|
|
|
@ -1392,6 +1392,26 @@ DistPartitionKey(Oid relationId)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ForceDistPartitionKey is the same as DistPartitionKey but errors out instead
|
||||
* of returning NULL if this is called with a relationId of a reference table.
|
||||
*/
|
||||
Var *
|
||||
ForceDistPartitionKey(Oid relationId)
|
||||
{
|
||||
Var *partitionKey = DistPartitionKey(relationId);
|
||||
|
||||
if (partitionKey == NULL)
|
||||
{
|
||||
ereport(ERROR, (errmsg(
|
||||
"no distribution column found for relation %d, because it is a reference table",
|
||||
relationId)));
|
||||
}
|
||||
|
||||
return partitionKey;
|
||||
}
|
||||
|
||||
|
||||
/* Returns the partition method for the given relation. */
|
||||
char
|
||||
PartitionMethod(Oid relationId)
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include "distributed/multi_logical_planner.h"
|
||||
#include "distributed/multi_physical_planner.h"
|
||||
#include "distributed/pg_dist_partition.h"
|
||||
#include "distributed/tdigest_extension.h"
|
||||
#include "distributed/worker_protocol.h"
|
||||
#include "distributed/version_compat.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
|
@ -52,11 +53,14 @@
|
|||
#include "parser/parse_coerce.h"
|
||||
#include "parser/parse_oper.h"
|
||||
#include "parser/parsetree.h"
|
||||
#include "rewrite/rewriteManip.h"
|
||||
#include "utils/fmgroids.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/syscache.h"
|
||||
|
||||
#define StartsWith(msg, prefix) \
|
||||
(strncmp(msg, prefix, strlen(prefix)) == 0)
|
||||
|
||||
/* Config variable managed via guc.c */
|
||||
int LimitClauseRowFetchCount = -1; /* number of rows to fetch from each task */
|
||||
|
@ -1434,7 +1438,7 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode,
|
|||
Expr *originalExpression = originalTargetEntry->expr;
|
||||
Expr *newExpression = NULL;
|
||||
|
||||
bool hasAggregates = contain_agg_clause((Node *) originalExpression);
|
||||
bool hasAggregates = contain_aggs_of_level((Node *) originalExpression, 0);
|
||||
bool hasWindowFunction = contain_window_function((Node *) originalExpression);
|
||||
|
||||
/*
|
||||
|
@ -1934,6 +1938,131 @@ MasterAggregateExpression(Aggref *originalAggregate,
|
|||
|
||||
newMasterExpression = (Expr *) unionAggregate;
|
||||
}
|
||||
else if (aggregateType == AGGREGATE_TDIGEST_COMBINE ||
|
||||
aggregateType == AGGREGATE_TDIGEST_ADD_DOUBLE)
|
||||
{
|
||||
/* tdigest of column */
|
||||
Oid tdigestType = TDigestExtensionTypeOid(); /* tdigest type */
|
||||
Oid unionFunctionId = TDigestExtensionAggTDigest1();
|
||||
|
||||
int32 tdigestReturnTypeMod = exprTypmod((Node *) originalAggregate);
|
||||
Oid tdigestTypeCollationId = exprCollation((Node *) originalAggregate);
|
||||
|
||||
/* create first argument for tdigest_precentile(tdigest, double) */
|
||||
Var *tdigestColumn = makeVar(masterTableId, walkerContext->columnId, tdigestType,
|
||||
tdigestReturnTypeMod, tdigestTypeCollationId,
|
||||
columnLevelsUp);
|
||||
TargetEntry *tdigestTargetEntry = makeTargetEntry((Expr *) tdigestColumn,
|
||||
argumentId,
|
||||
NULL, false);
|
||||
walkerContext->columnId++;
|
||||
|
||||
/* construct the master tdigest(tdigest) expression */
|
||||
Aggref *unionAggregate = makeNode(Aggref);
|
||||
unionAggregate->aggfnoid = unionFunctionId;
|
||||
unionAggregate->aggtype = originalAggregate->aggtype;
|
||||
unionAggregate->args = list_make1(tdigestTargetEntry);
|
||||
unionAggregate->aggkind = AGGKIND_NORMAL;
|
||||
unionAggregate->aggfilter = NULL;
|
||||
unionAggregate->aggtranstype = InvalidOid;
|
||||
unionAggregate->aggargtypes = list_make1_oid(tdigestType);
|
||||
unionAggregate->aggsplit = AGGSPLIT_SIMPLE;
|
||||
|
||||
newMasterExpression = (Expr *) unionAggregate;
|
||||
}
|
||||
else if (aggregateType == AGGREGATE_TDIGEST_PERCENTILE_ADD_DOUBLE ||
|
||||
aggregateType == AGGREGATE_TDIGEST_PERCENTILE_ADD_DOUBLEARRAY ||
|
||||
aggregateType == AGGREGATE_TDIGEST_PERCENTILE_OF_ADD_DOUBLE ||
|
||||
aggregateType == AGGREGATE_TDIGEST_PERCENTILE_OF_ADD_DOUBLEARRAY)
|
||||
{
|
||||
/* tdigest of column */
|
||||
Oid tdigestType = TDigestExtensionTypeOid();
|
||||
Oid unionFunctionId = InvalidOid;
|
||||
if (aggregateType == AGGREGATE_TDIGEST_PERCENTILE_ADD_DOUBLE)
|
||||
{
|
||||
unionFunctionId = TDigestExtensionAggTDigestPercentile2();
|
||||
}
|
||||
else if (aggregateType == AGGREGATE_TDIGEST_PERCENTILE_ADD_DOUBLEARRAY)
|
||||
{
|
||||
unionFunctionId = TDigestExtensionAggTDigestPercentile2a();
|
||||
}
|
||||
else if (aggregateType == AGGREGATE_TDIGEST_PERCENTILE_OF_ADD_DOUBLE)
|
||||
{
|
||||
unionFunctionId = TDigestExtensionAggTDigestPercentileOf2();
|
||||
}
|
||||
else if (aggregateType == AGGREGATE_TDIGEST_PERCENTILE_OF_ADD_DOUBLEARRAY)
|
||||
{
|
||||
unionFunctionId = TDigestExtensionAggTDigestPercentileOf2a();
|
||||
}
|
||||
Assert(OidIsValid(unionFunctionId));
|
||||
|
||||
int32 tdigestReturnTypeMod = exprTypmod((Node *) originalAggregate);
|
||||
Oid tdigestTypeCollationId = exprCollation((Node *) originalAggregate);
|
||||
|
||||
/* create first argument for tdigest_precentile(tdigest, double) */
|
||||
Var *tdigestColumn = makeVar(masterTableId, walkerContext->columnId, tdigestType,
|
||||
tdigestReturnTypeMod, tdigestTypeCollationId,
|
||||
columnLevelsUp);
|
||||
TargetEntry *tdigestTargetEntry = makeTargetEntry((Expr *) tdigestColumn,
|
||||
argumentId, NULL, false);
|
||||
walkerContext->columnId++;
|
||||
|
||||
/* construct the master tdigest_precentile(tdigest, double) expression */
|
||||
Aggref *unionAggregate = makeNode(Aggref);
|
||||
unionAggregate->aggfnoid = unionFunctionId;
|
||||
unionAggregate->aggtype = originalAggregate->aggtype;
|
||||
unionAggregate->args = list_make2(
|
||||
tdigestTargetEntry,
|
||||
list_nth(originalAggregate->args, 2));
|
||||
unionAggregate->aggkind = AGGKIND_NORMAL;
|
||||
unionAggregate->aggfilter = NULL;
|
||||
unionAggregate->aggtranstype = InvalidOid;
|
||||
unionAggregate->aggargtypes = list_make2_oid(
|
||||
tdigestType,
|
||||
list_nth_oid(originalAggregate->aggargtypes, 2));
|
||||
unionAggregate->aggsplit = AGGSPLIT_SIMPLE;
|
||||
|
||||
newMasterExpression = (Expr *) unionAggregate;
|
||||
}
|
||||
else if (aggregateType == AGGREGATE_TDIGEST_PERCENTILE_TDIGEST_DOUBLE ||
|
||||
aggregateType == AGGREGATE_TDIGEST_PERCENTILE_TDIGEST_DOUBLEARRAY ||
|
||||
aggregateType == AGGREGATE_TDIGEST_PERCENTILE_OF_TDIGEST_DOUBLE ||
|
||||
aggregateType == AGGREGATE_TDIGEST_PERCENTILE_OF_TDIGEST_DOUBLEARRAY)
|
||||
{
|
||||
/* tdigest of column */
|
||||
Oid tdigestType = TDigestExtensionTypeOid();
|
||||
|
||||
/* These functions already will combine the tdigest arguments returned */
|
||||
Oid unionFunctionId = originalAggregate->aggfnoid;
|
||||
|
||||
int32 tdigestReturnTypeMod = exprTypmod((Node *) originalAggregate);
|
||||
Oid tdigestTypeCollationId = exprCollation((Node *) originalAggregate);
|
||||
|
||||
/* create first argument for tdigest_precentile(tdigest, double) */
|
||||
Var *tdigestColumn = makeVar(masterTableId, walkerContext->columnId, tdigestType,
|
||||
tdigestReturnTypeMod, tdigestTypeCollationId,
|
||||
columnLevelsUp);
|
||||
TargetEntry *tdigestTargetEntry = makeTargetEntry((Expr *) tdigestColumn,
|
||||
argumentId, NULL, false);
|
||||
walkerContext->columnId++;
|
||||
|
||||
/* construct the master tdigest_precentile(tdigest, double) expression */
|
||||
Aggref *unionAggregate = makeNode(Aggref);
|
||||
unionAggregate->aggfnoid = unionFunctionId;
|
||||
unionAggregate->aggtype = originalAggregate->aggtype;
|
||||
unionAggregate->args = list_make2(
|
||||
tdigestTargetEntry,
|
||||
list_nth(originalAggregate->args, 1));
|
||||
unionAggregate->aggkind = AGGKIND_NORMAL;
|
||||
unionAggregate->aggfilter = NULL;
|
||||
unionAggregate->aggtranstype = InvalidOid;
|
||||
unionAggregate->aggargtypes = list_make2_oid(
|
||||
tdigestType,
|
||||
list_nth_oid(originalAggregate->aggargtypes, 1));
|
||||
unionAggregate->aggsplit = AGGSPLIT_SIMPLE;
|
||||
|
||||
newMasterExpression = (Expr *) unionAggregate;
|
||||
}
|
||||
else if (aggregateType == AGGREGATE_CUSTOM_COMBINE)
|
||||
{
|
||||
HeapTuple aggTuple =
|
||||
|
@ -2331,7 +2460,7 @@ ProcessTargetListForWorkerQuery(List *targetEntryList,
|
|||
TargetEntry *originalTargetEntry = (TargetEntry *) lfirst(targetEntryCell);
|
||||
Expr *originalExpression = originalTargetEntry->expr;
|
||||
List *newExpressionList = NIL;
|
||||
bool hasAggregates = contain_agg_clause((Node *) originalExpression);
|
||||
bool hasAggregates = contain_aggs_of_level((Node *) originalExpression, 0);
|
||||
bool hasWindowFunction = contain_window_function((Node *) originalExpression);
|
||||
|
||||
/* reset walker context */
|
||||
|
@ -2674,7 +2803,7 @@ TargetListHasAggregates(List *targetEntryList)
|
|||
{
|
||||
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
|
||||
Expr *targetExpr = targetEntry->expr;
|
||||
bool hasAggregates = contain_agg_clause((Node *) targetExpr);
|
||||
bool hasAggregates = contain_aggs_of_level((Node *) targetExpr, 0);
|
||||
bool hasWindowFunction = contain_window_function((Node *) targetExpr);
|
||||
|
||||
/*
|
||||
|
@ -3047,6 +3176,71 @@ WorkerAggregateExpressionList(Aggref *originalAggregate,
|
|||
workerAggregateList = lappend(workerAggregateList, sumAggregate);
|
||||
workerAggregateList = lappend(workerAggregateList, countAggregate);
|
||||
}
|
||||
else if (aggregateType == AGGREGATE_TDIGEST_PERCENTILE_ADD_DOUBLE ||
|
||||
aggregateType == AGGREGATE_TDIGEST_PERCENTILE_ADD_DOUBLEARRAY ||
|
||||
aggregateType == AGGREGATE_TDIGEST_PERCENTILE_OF_ADD_DOUBLE ||
|
||||
aggregateType == AGGREGATE_TDIGEST_PERCENTILE_OF_ADD_DOUBLEARRAY)
|
||||
{
|
||||
/*
|
||||
* The original query has an aggregate in the form of either
|
||||
* - tdigest_percentile(column, compression, quantile)
|
||||
* - tdigest_percentile(column, compression, quantile[])
|
||||
* - tdigest_percentile_of(column, compression, value)
|
||||
* - tdigest_percentile_of(column, compression, value[])
|
||||
*
|
||||
* We are creating the worker part of this query by creating a
|
||||
* - tdigest(column, compression)
|
||||
*
|
||||
* One could see we are passing argument 0 and argument 1 from the original query
|
||||
* in here. This corresponds with the list_nth calls in the args and aggargstypes
|
||||
* list construction. The tdigest function and type are read from the catalog.
|
||||
*/
|
||||
Aggref *newWorkerAggregate = copyObject(originalAggregate);
|
||||
newWorkerAggregate->aggfnoid = TDigestExtensionAggTDigest2();
|
||||
newWorkerAggregate->aggtype = TDigestExtensionTypeOid();
|
||||
newWorkerAggregate->args = list_make2(
|
||||
list_nth(newWorkerAggregate->args, 0),
|
||||
list_nth(newWorkerAggregate->args, 1));
|
||||
newWorkerAggregate->aggkind = AGGKIND_NORMAL;
|
||||
newWorkerAggregate->aggtranstype = InvalidOid;
|
||||
newWorkerAggregate->aggargtypes = list_make2_oid(
|
||||
list_nth_oid(newWorkerAggregate->aggargtypes, 0),
|
||||
list_nth_oid(newWorkerAggregate->aggargtypes, 1));
|
||||
newWorkerAggregate->aggsplit = AGGSPLIT_SIMPLE;
|
||||
|
||||
workerAggregateList = lappend(workerAggregateList, newWorkerAggregate);
|
||||
}
|
||||
else if (aggregateType == AGGREGATE_TDIGEST_PERCENTILE_TDIGEST_DOUBLE ||
|
||||
aggregateType == AGGREGATE_TDIGEST_PERCENTILE_TDIGEST_DOUBLEARRAY ||
|
||||
aggregateType == AGGREGATE_TDIGEST_PERCENTILE_OF_TDIGEST_DOUBLE ||
|
||||
aggregateType == AGGREGATE_TDIGEST_PERCENTILE_OF_TDIGEST_DOUBLEARRAY)
|
||||
{
|
||||
/*
|
||||
* The original query has an aggregate in the form of either
|
||||
* - tdigest_percentile(tdigest, quantile)
|
||||
* - tdigest_percentile(tdigest, quantile[])
|
||||
* - tdigest_percentile_of(tdigest, value)
|
||||
* - tdigest_percentile_of(tdigest, value[])
|
||||
*
|
||||
* We are creating the worker part of this query by creating a
|
||||
* - tdigest(tdigest)
|
||||
*
|
||||
* One could see we are passing argument 0 from the original query in here. This
|
||||
* corresponds with the list_nth calls in the args and aggargstypes list
|
||||
* construction. The tdigest function and type are read from the catalog.
|
||||
*/
|
||||
Aggref *newWorkerAggregate = copyObject(originalAggregate);
|
||||
newWorkerAggregate->aggfnoid = TDigestExtensionAggTDigest1();
|
||||
newWorkerAggregate->aggtype = TDigestExtensionTypeOid();
|
||||
newWorkerAggregate->args = list_make1(list_nth(newWorkerAggregate->args, 0));
|
||||
newWorkerAggregate->aggkind = AGGKIND_NORMAL;
|
||||
newWorkerAggregate->aggtranstype = InvalidOid;
|
||||
newWorkerAggregate->aggargtypes = list_make1_oid(
|
||||
list_nth_oid(newWorkerAggregate->aggargtypes, 0));
|
||||
newWorkerAggregate->aggsplit = AGGSPLIT_SIMPLE;
|
||||
|
||||
workerAggregateList = lappend(workerAggregateList, newWorkerAggregate);
|
||||
}
|
||||
else if (aggregateType == AGGREGATE_CUSTOM_COMBINE)
|
||||
{
|
||||
HeapTuple aggTuple =
|
||||
|
@ -3154,6 +3348,66 @@ GetAggregateType(Aggref *aggregateExpression)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* All functions from github.com/tvondra/tdigest start with the "tdigest" prefix.
|
||||
* Since it requires lookups of function names in a schema we would like to only
|
||||
* perform these checks if there is some chance it will actually result in a positive
|
||||
* hit.
|
||||
*/
|
||||
if (StartsWith(aggregateProcName, "tdigest"))
|
||||
{
|
||||
if (aggFunctionId == TDigestExtensionAggTDigest1())
|
||||
{
|
||||
return AGGREGATE_TDIGEST_COMBINE;
|
||||
}
|
||||
|
||||
if (aggFunctionId == TDigestExtensionAggTDigest2())
|
||||
{
|
||||
return AGGREGATE_TDIGEST_ADD_DOUBLE;
|
||||
}
|
||||
|
||||
if (aggFunctionId == TDigestExtensionAggTDigestPercentile3())
|
||||
{
|
||||
return AGGREGATE_TDIGEST_PERCENTILE_ADD_DOUBLE;
|
||||
}
|
||||
|
||||
if (aggFunctionId == TDigestExtensionAggTDigestPercentile3a())
|
||||
{
|
||||
return AGGREGATE_TDIGEST_PERCENTILE_ADD_DOUBLEARRAY;
|
||||
}
|
||||
|
||||
if (aggFunctionId == TDigestExtensionAggTDigestPercentile2())
|
||||
{
|
||||
return AGGREGATE_TDIGEST_PERCENTILE_TDIGEST_DOUBLE;
|
||||
}
|
||||
|
||||
if (aggFunctionId == TDigestExtensionAggTDigestPercentile2a())
|
||||
{
|
||||
return AGGREGATE_TDIGEST_PERCENTILE_TDIGEST_DOUBLEARRAY;
|
||||
}
|
||||
|
||||
if (aggFunctionId == TDigestExtensionAggTDigestPercentileOf3())
|
||||
{
|
||||
return AGGREGATE_TDIGEST_PERCENTILE_OF_ADD_DOUBLE;
|
||||
}
|
||||
|
||||
if (aggFunctionId == TDigestExtensionAggTDigestPercentileOf3a())
|
||||
{
|
||||
return AGGREGATE_TDIGEST_PERCENTILE_OF_ADD_DOUBLEARRAY;
|
||||
}
|
||||
|
||||
if (aggFunctionId == TDigestExtensionAggTDigestPercentileOf2())
|
||||
{
|
||||
return AGGREGATE_TDIGEST_PERCENTILE_OF_TDIGEST_DOUBLE;
|
||||
}
|
||||
|
||||
if (aggFunctionId == TDigestExtensionAggTDigestPercentileOf2a())
|
||||
{
|
||||
return AGGREGATE_TDIGEST_PERCENTILE_OF_TDIGEST_DOUBLEARRAY;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (AggregateEnabledCustom(aggregateExpression))
|
||||
{
|
||||
return AGGREGATE_CUSTOM_COMBINE;
|
||||
|
@ -3495,6 +3749,7 @@ RequiresIntermediateRowPullUp(MultiNode *logicalPlanNode)
|
|||
MultiExtendedOp *extendedOpNode = (MultiExtendedOp *) linitial(opNodeList);
|
||||
|
||||
List *targetList = extendedOpNode->targetList;
|
||||
Node *havingQual = extendedOpNode->havingQual;
|
||||
|
||||
/*
|
||||
* PVC_REJECT_PLACEHOLDERS is implicit if PVC_INCLUDE_PLACEHOLDERS isn't
|
||||
|
@ -3502,6 +3757,8 @@ RequiresIntermediateRowPullUp(MultiNode *logicalPlanNode)
|
|||
*/
|
||||
List *expressionList = pull_var_clause((Node *) targetList, PVC_INCLUDE_AGGREGATES |
|
||||
PVC_INCLUDE_WINDOWFUNCS);
|
||||
expressionList = list_concat(expressionList,
|
||||
pull_var_clause(havingQual, PVC_INCLUDE_AGGREGATES));
|
||||
|
||||
Node *expression = NULL;
|
||||
foreach_ptr(expression, expressionList)
|
||||
|
@ -3538,6 +3795,7 @@ DeferErrorIfContainsNonPushdownableAggregate(MultiNode *logicalPlanNode)
|
|||
MultiExtendedOp *extendedOpNode = (MultiExtendedOp *) linitial(opNodeList);
|
||||
|
||||
List *targetList = extendedOpNode->targetList;
|
||||
Node *havingQual = extendedOpNode->havingQual;
|
||||
|
||||
/*
|
||||
* PVC_REJECT_PLACEHOLDERS is implicit if PVC_INCLUDE_PLACEHOLDERS isn't
|
||||
|
@ -3545,6 +3803,8 @@ DeferErrorIfContainsNonPushdownableAggregate(MultiNode *logicalPlanNode)
|
|||
*/
|
||||
List *expressionList = pull_var_clause((Node *) targetList, PVC_INCLUDE_AGGREGATES |
|
||||
PVC_INCLUDE_WINDOWFUNCS);
|
||||
expressionList = list_concat(expressionList,
|
||||
pull_var_clause(havingQual, PVC_INCLUDE_AGGREGATES));
|
||||
|
||||
ListCell *expressionCell = NULL;
|
||||
foreach(expressionCell, expressionList)
|
||||
|
@ -4390,7 +4650,7 @@ GenerateNewTargetEntriesForSortClauses(List *originalTargetList,
|
|||
SortGroupClause *sgClause = (SortGroupClause *) lfirst(sortClauseCell);
|
||||
TargetEntry *targetEntry = get_sortgroupclause_tle(sgClause, originalTargetList);
|
||||
Expr *targetExpr = targetEntry->expr;
|
||||
bool containsAggregate = contain_agg_clause((Node *) targetExpr);
|
||||
bool containsAggregate = contain_aggs_of_level((Node *) targetExpr, 0);
|
||||
bool createNewTargetEntry = false;
|
||||
|
||||
/* we are only interested in target entries containing aggregates */
|
||||
|
@ -4492,7 +4752,7 @@ HasOrderByAggregate(List *sortClauseList, List *targetList)
|
|||
SortGroupClause *sortClause = (SortGroupClause *) lfirst(sortClauseCell);
|
||||
Node *sortExpression = get_sortgroupclause_expr(sortClause, targetList);
|
||||
|
||||
bool containsAggregate = contain_agg_clause(sortExpression);
|
||||
bool containsAggregate = contain_aggs_of_level(sortExpression, 0);
|
||||
if (containsAggregate)
|
||||
{
|
||||
hasOrderByAggregate = true;
|
||||
|
@ -4566,7 +4826,7 @@ HasOrderByComplexExpression(List *sortClauseList, List *targetList)
|
|||
continue;
|
||||
}
|
||||
|
||||
bool nestedAggregate = contain_agg_clause(sortExpression);
|
||||
bool nestedAggregate = contain_aggs_of_level(sortExpression, 0);
|
||||
if (nestedAggregate)
|
||||
{
|
||||
hasOrderByComplexExpression = true;
|
||||
|
|
|
@ -72,6 +72,7 @@ static RuleApplyFunction RuleApplyFunctionArray[JOIN_RULE_LAST] = { 0 }; /* join
|
|||
/* Local functions forward declarations */
|
||||
static bool AllTargetExpressionsAreColumnReferences(List *targetEntryList);
|
||||
static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query);
|
||||
static Oid NodeTryGetRteRelid(Node *node);
|
||||
static bool FullCompositeFieldList(List *compositeFieldList);
|
||||
static bool HasUnsupportedJoinWalker(Node *node, void *context);
|
||||
static bool ErrorHintRequired(const char *errorHint, Query *queryTree);
|
||||
|
@ -412,37 +413,55 @@ QueryContainsDistributedTableRTE(Query *query)
|
|||
|
||||
|
||||
/*
|
||||
* IsDistributedTableRTE gets a node and returns true if the node
|
||||
* is a range table relation entry that points to a distributed
|
||||
* relation (i.e., excluding reference tables).
|
||||
* NodeTryGetRteRelid returns the relid of the given RTE_RELATION RangeTableEntry.
|
||||
* Returns InvalidOid if any of these assumptions fail for given node.
|
||||
*/
|
||||
bool
|
||||
IsDistributedTableRTE(Node *node)
|
||||
static Oid
|
||||
NodeTryGetRteRelid(Node *node)
|
||||
{
|
||||
if (node == NULL)
|
||||
{
|
||||
return false;
|
||||
return InvalidOid;
|
||||
}
|
||||
|
||||
if (!IsA(node, RangeTblEntry))
|
||||
{
|
||||
return false;
|
||||
return InvalidOid;
|
||||
}
|
||||
|
||||
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) node;
|
||||
if (rangeTableEntry->rtekind != RTE_RELATION)
|
||||
{
|
||||
return false;
|
||||
return InvalidOid;
|
||||
}
|
||||
|
||||
Oid relationId = rangeTableEntry->relid;
|
||||
if (!IsDistributedTable(relationId) ||
|
||||
PartitionMethod(relationId) == DISTRIBUTE_BY_NONE)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return rangeTableEntry->relid;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
/*
|
||||
* IsCitusTableRTE gets a node and returns true if the node is a
|
||||
* range table relation entry that points to a distributed relation.
|
||||
*/
|
||||
bool
|
||||
IsCitusTableRTE(Node *node)
|
||||
{
|
||||
Oid relationId = NodeTryGetRteRelid(node);
|
||||
return relationId != InvalidOid && IsDistributedTable(relationId);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsDistributedTableRTE gets a node and returns true if the node
|
||||
* is a range table relation entry that points to a distributed relation,
|
||||
* returning false still if the relation is a reference table.
|
||||
*/
|
||||
bool
|
||||
IsDistributedTableRTE(Node *node)
|
||||
{
|
||||
Oid relationId = NodeTryGetRteRelid(node);
|
||||
return relationId != InvalidOid && IsDistributedTable(relationId) &&
|
||||
PartitionMethod(relationId) != DISTRIBUTE_BY_NONE;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -68,6 +68,7 @@
|
|||
#include "optimizer/tlist.h"
|
||||
#include "parser/parse_relation.h"
|
||||
#include "parser/parsetree.h"
|
||||
#include "rewrite/rewriteManip.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/catcache.h"
|
||||
#include "utils/fmgroids.h"
|
||||
|
@ -139,6 +140,7 @@ static ArrayType * SplitPointObject(ShardInterval **shardIntervalArray,
|
|||
static bool DistributedPlanRouterExecutable(DistributedPlan *distributedPlan);
|
||||
static Job * BuildJobTreeTaskList(Job *jobTree,
|
||||
PlannerRestrictionContext *plannerRestrictionContext);
|
||||
static bool IsInnerTableOfOuterJoin(RelationRestriction *relationRestriction);
|
||||
static void ErrorIfUnsupportedShardDistribution(Query *query);
|
||||
static Task * QueryPushdownTaskCreate(Query *originalQuery, int shardIndex,
|
||||
RelationRestrictionContext *restrictionContext,
|
||||
|
@ -715,7 +717,8 @@ BuildJobQuery(MultiNode *multiNode, List *dependentJobList)
|
|||
jobQuery->limitOffset = limitOffset;
|
||||
jobQuery->limitCount = limitCount;
|
||||
jobQuery->havingQual = havingQual;
|
||||
jobQuery->hasAggs = contain_agg_clause((Node *) targetList);
|
||||
jobQuery->hasAggs = contain_aggs_of_level((Node *) targetList, 0) ||
|
||||
contain_aggs_of_level((Node *) havingQual, 0);
|
||||
jobQuery->distinctClause = distinctClause;
|
||||
jobQuery->hasDistinctOn = hasDistinctOn;
|
||||
|
||||
|
@ -799,7 +802,7 @@ BuildReduceQuery(MultiExtendedOp *extendedOpNode, List *dependentJobList)
|
|||
reduceQuery->limitOffset = extendedOpNode->limitOffset;
|
||||
reduceQuery->limitCount = extendedOpNode->limitCount;
|
||||
reduceQuery->havingQual = extendedOpNode->havingQual;
|
||||
reduceQuery->hasAggs = contain_agg_clause((Node *) targetList);
|
||||
reduceQuery->hasAggs = contain_aggs_of_level((Node *) targetList, 0);
|
||||
|
||||
return reduceQuery;
|
||||
}
|
||||
|
@ -1516,8 +1519,8 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
|
|||
/* build the where clause list using select predicates */
|
||||
List *whereClauseList = QuerySelectClauseList(multiNode);
|
||||
|
||||
if (contain_agg_clause((Node *) targetList) ||
|
||||
contain_agg_clause((Node *) havingQual))
|
||||
if (contain_aggs_of_level((Node *) targetList, 0) ||
|
||||
contain_aggs_of_level((Node *) havingQual, 0))
|
||||
{
|
||||
hasAggregates = true;
|
||||
}
|
||||
|
@ -2163,6 +2166,21 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
|
|||
maxShardOffset = -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* For left joins we don't care about the shards pruned for the right hand side.
|
||||
* If the right hand side would prune to a smaller set we should still send it to
|
||||
* all tables of the left hand side. However if the right hand side is bigger than
|
||||
* the left hand side we don't have to send the query to any shard that is not
|
||||
* matching anything on the left hand side.
|
||||
*
|
||||
* Instead we will simply skip any RelationRestriction if it is an OUTER join and
|
||||
* the table is part of the non-outer side of the join.
|
||||
*/
|
||||
if (IsInnerTableOfOuterJoin(relationRestriction))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach(shardIntervalCell, prunedShardList)
|
||||
{
|
||||
ShardInterval *shardInterval = (ShardInterval *) lfirst(shardIntervalCell);
|
||||
|
@ -2170,15 +2188,8 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
|
|||
|
||||
taskRequiredForShardIndex[shardIndex] = true;
|
||||
|
||||
if (shardIndex < minShardOffset)
|
||||
{
|
||||
minShardOffset = shardIndex;
|
||||
}
|
||||
|
||||
if (shardIndex > maxShardOffset)
|
||||
{
|
||||
maxShardOffset = shardIndex;
|
||||
}
|
||||
minShardOffset = Min(minShardOffset, shardIndex);
|
||||
maxShardOffset = Max(maxShardOffset, shardIndex);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2228,6 +2239,45 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsInnerTableOfOuterJoin tests based on the join information envoded in a
|
||||
* RelationRestriction if the table accessed for this relation is
|
||||
* a) in an outer join
|
||||
* b) on the inner part of said join
|
||||
*
|
||||
* The function returns true only if both conditions above hold true
|
||||
*/
|
||||
static bool
|
||||
IsInnerTableOfOuterJoin(RelationRestriction *relationRestriction)
|
||||
{
|
||||
RestrictInfo *joinInfo = NULL;
|
||||
foreach_ptr(joinInfo, relationRestriction->relOptInfo->joininfo)
|
||||
{
|
||||
if (joinInfo->outer_relids == NULL)
|
||||
{
|
||||
/* not an outer join */
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* This join restriction info describes an outer join, we need to figure out if
|
||||
* our table is in the non outer part of this join. If that is the case this is a
|
||||
* non outer table of an outer join.
|
||||
*/
|
||||
bool isInOuter = bms_is_member(relationRestriction->relOptInfo->relid,
|
||||
joinInfo->outer_relids);
|
||||
if (!isInOuter)
|
||||
{
|
||||
/* this table is joined in the inner part of an outer join */
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* we have not found any join clause that satisfies both requirements */
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ErrorIfUnsupportedShardDistribution gets list of relations in the given query
|
||||
* and checks if two conditions below hold for them, otherwise it errors out.
|
||||
|
@ -3202,6 +3252,43 @@ GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* BinaryOpExpression checks that a given expression is a binary operator. If
|
||||
* this is the case it returns true and sets leftOperand and rightOperand to
|
||||
* the left and right hand side of the operator. left/rightOperand will be
|
||||
* stripped of implicit coercions by strip_implicit_coercions.
|
||||
*/
|
||||
bool
|
||||
BinaryOpExpression(Expr *clause, Node **leftOperand, Node **rightOperand)
|
||||
{
|
||||
if (!is_opclause(clause) || list_length(((OpExpr *) clause)->args) != 2)
|
||||
{
|
||||
if (leftOperand != NULL)
|
||||
{
|
||||
*leftOperand = NULL;
|
||||
}
|
||||
if (rightOperand != NULL)
|
||||
{
|
||||
*leftOperand = NULL;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (leftOperand != NULL)
|
||||
{
|
||||
*leftOperand = get_leftop(clause);
|
||||
Assert(*leftOperand != NULL);
|
||||
*leftOperand = strip_implicit_coercions(*leftOperand);
|
||||
}
|
||||
if (rightOperand != NULL)
|
||||
{
|
||||
*rightOperand = get_rightop(clause);
|
||||
Assert(*rightOperand != NULL);
|
||||
*rightOperand = strip_implicit_coercions(*rightOperand);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SimpleOpExpression checks that given expression is a simple operator
|
||||
* expression. A simple operator expression is a binary operator expression with
|
||||
|
@ -3210,23 +3297,14 @@ GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber)
|
|||
bool
|
||||
SimpleOpExpression(Expr *clause)
|
||||
{
|
||||
Node *leftOperand = NULL;
|
||||
Node *rightOperand = NULL;
|
||||
Const *constantClause = NULL;
|
||||
|
||||
if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
|
||||
Node *leftOperand;
|
||||
Node *rightOperand;
|
||||
if (!BinaryOpExpression(clause, &leftOperand, &rightOperand))
|
||||
{
|
||||
leftOperand = get_leftop(clause);
|
||||
rightOperand = get_rightop(clause);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false; /* not a binary opclause */
|
||||
}
|
||||
|
||||
/* strip coercions before doing check */
|
||||
leftOperand = strip_implicit_coercions(leftOperand);
|
||||
rightOperand = strip_implicit_coercions(rightOperand);
|
||||
|
||||
if (IsA(rightOperand, Const) && IsA(leftOperand, Var))
|
||||
{
|
||||
|
@ -3259,14 +3337,14 @@ SimpleOpExpression(Expr *clause)
|
|||
bool
|
||||
OpExpressionContainsColumn(OpExpr *operatorExpression, Var *partitionColumn)
|
||||
{
|
||||
Node *leftOperand = get_leftop((Expr *) operatorExpression);
|
||||
Node *rightOperand = get_rightop((Expr *) operatorExpression);
|
||||
Node *leftOperand;
|
||||
Node *rightOperand;
|
||||
if (!BinaryOpExpression((Expr *) operatorExpression, &leftOperand, &rightOperand))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
Var *column = NULL;
|
||||
|
||||
/* strip coercions before doing check */
|
||||
leftOperand = strip_implicit_coercions(leftOperand);
|
||||
rightOperand = strip_implicit_coercions(rightOperand);
|
||||
|
||||
if (IsA(leftOperand, Var))
|
||||
{
|
||||
column = (Var *) leftOperand;
|
||||
|
@ -3335,6 +3413,8 @@ UpdateConstraint(Node *baseConstraint, ShardInterval *shardInterval)
|
|||
Assert(shardInterval != NULL);
|
||||
Assert(shardInterval->minValueExists);
|
||||
Assert(shardInterval->maxValueExists);
|
||||
Assert(minNode != NULL);
|
||||
Assert(maxNode != NULL);
|
||||
Assert(IsA(minNode, Const));
|
||||
Assert(IsA(maxNode, Const));
|
||||
|
||||
|
@ -4331,7 +4411,7 @@ GenerateSyntheticShardIntervalArray(int partitionCount)
|
|||
ShardInterval *shardInterval = CitusMakeNode(ShardInterval);
|
||||
|
||||
/* calculate the split of the hash space */
|
||||
int32 shardMinHashToken = INT32_MIN + (shardIndex * hashTokenIncrement);
|
||||
int32 shardMinHashToken = PG_INT32_MIN + (shardIndex * hashTokenIncrement);
|
||||
int32 shardMaxHashToken = shardMinHashToken + (hashTokenIncrement - 1);
|
||||
|
||||
shardInterval->relationId = InvalidOid;
|
||||
|
|
|
@ -158,11 +158,11 @@ static int CompareInsertValuesByShardId(const void *leftElement,
|
|||
const void *rightElement);
|
||||
static List * SingleShardSelectTaskList(Query *query, uint64 jobId,
|
||||
List *relationShardList, List *placementList,
|
||||
uint64 shardId);
|
||||
uint64 shardId, bool parametersInQueryResolved);
|
||||
static bool RowLocksOnRelations(Node *node, List **rtiLockList);
|
||||
static List * SingleShardModifyTaskList(Query *query, uint64 jobId,
|
||||
List *relationShardList, List *placementList,
|
||||
uint64 shardId);
|
||||
uint64 shardId, bool parametersInQueryResolved);
|
||||
static List * RemoveCoordinatorPlacement(List *placementList);
|
||||
static void ReorderTaskPlacementsByTaskAssignmentPolicy(Job *job,
|
||||
TaskAssignmentPolicyType
|
||||
|
@ -1383,7 +1383,11 @@ TargetEntryChangesValue(TargetEntry *targetEntry, Var *column, FromExpr *joinTre
|
|||
Const *newValue = (Const *) setExpr;
|
||||
List *restrictClauseList = WhereClauseList(joinTree);
|
||||
OpExpr *equalityExpr = MakeOpExpression(column, BTEqualStrategyNumber);
|
||||
Const *rightConst = (Const *) get_rightop((Expr *) equalityExpr);
|
||||
Node *rightOp = get_rightop((Expr *) equalityExpr);
|
||||
|
||||
Assert(rightOp != NULL);
|
||||
Assert(IsA(rightOp, Const));
|
||||
Const *rightConst = (Const *) rightOp;
|
||||
|
||||
rightConst->constvalue = newValue->constvalue;
|
||||
rightConst->constisnull = newValue->constisnull;
|
||||
|
@ -1443,7 +1447,9 @@ RouterInsertJob(Query *originalQuery, Query *query, DeferredErrorMessage **plann
|
|||
}
|
||||
else
|
||||
{
|
||||
taskList = RouterInsertTaskList(query, planningError);
|
||||
bool parametersInQueryResolved = false;
|
||||
|
||||
taskList = RouterInsertTaskList(query, parametersInQueryResolved, planningError);
|
||||
if (*planningError)
|
||||
{
|
||||
return NULL;
|
||||
|
@ -1453,19 +1459,20 @@ RouterInsertJob(Query *originalQuery, Query *query, DeferredErrorMessage **plann
|
|||
requiresMasterEvaluation = RequiresMasterEvaluation(originalQuery);
|
||||
}
|
||||
|
||||
Job *job = CreateJob(originalQuery);
|
||||
job->taskList = taskList;
|
||||
job->requiresMasterEvaluation = requiresMasterEvaluation;
|
||||
job->deferredPruning = deferredPruning;
|
||||
|
||||
if (!requiresMasterEvaluation)
|
||||
{
|
||||
/* no functions or parameters, build the query strings upfront */
|
||||
RebuildQueryStrings(originalQuery, taskList);
|
||||
RebuildQueryStrings(job);
|
||||
|
||||
/* remember the partition column value */
|
||||
partitionKeyValue = ExtractInsertPartitionKeyValue(originalQuery);
|
||||
}
|
||||
|
||||
Job *job = CreateJob(originalQuery);
|
||||
job->taskList = taskList;
|
||||
job->requiresMasterEvaluation = requiresMasterEvaluation;
|
||||
job->deferredPruning = deferredPruning;
|
||||
job->partitionKeyValue = partitionKeyValue;
|
||||
|
||||
return job;
|
||||
|
@ -1557,7 +1564,8 @@ ErrorIfNoShardsExist(DistTableCacheEntry *cacheEntry)
|
|||
* a distributed table via the router executor.
|
||||
*/
|
||||
List *
|
||||
RouterInsertTaskList(Query *query, DeferredErrorMessage **planningError)
|
||||
RouterInsertTaskList(Query *query, bool parametersInQueryResolved,
|
||||
DeferredErrorMessage **planningError)
|
||||
{
|
||||
List *insertTaskList = NIL;
|
||||
ListCell *modifyRouteCell = NULL;
|
||||
|
@ -1589,8 +1597,8 @@ RouterInsertTaskList(Query *query, DeferredErrorMessage **planningError)
|
|||
relationShard->relationId = distributedTableId;
|
||||
|
||||
modifyTask->relationShardList = list_make1(relationShard);
|
||||
|
||||
modifyTask->taskPlacementList = ShardPlacementList(modifyRoute->shardId);
|
||||
modifyTask->parametersInQueryStringResolved = parametersInQueryResolved;
|
||||
|
||||
insertTaskList = lappend(insertTaskList, modifyTask);
|
||||
}
|
||||
|
@ -1768,7 +1776,8 @@ GenerateSingleShardRouterTaskList(Job *job, List *relationShardList,
|
|||
{
|
||||
job->taskList = SingleShardSelectTaskList(originalQuery, job->jobId,
|
||||
relationShardList, placementList,
|
||||
shardId);
|
||||
shardId,
|
||||
job->parametersInJobQueryResolved);
|
||||
|
||||
/*
|
||||
* Queries to reference tables, or distributed tables with multiple replica's have
|
||||
|
@ -1794,7 +1803,8 @@ GenerateSingleShardRouterTaskList(Job *job, List *relationShardList,
|
|||
{
|
||||
job->taskList = SingleShardModifyTaskList(originalQuery, job->jobId,
|
||||
relationShardList, placementList,
|
||||
shardId);
|
||||
shardId,
|
||||
job->parametersInJobQueryResolved);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1886,7 +1896,8 @@ RemoveCoordinatorPlacement(List *placementList)
|
|||
*/
|
||||
static List *
|
||||
SingleShardSelectTaskList(Query *query, uint64 jobId, List *relationShardList,
|
||||
List *placementList, uint64 shardId)
|
||||
List *placementList, uint64 shardId,
|
||||
bool parametersInQueryResolved)
|
||||
{
|
||||
Task *task = CreateTask(SELECT_TASK);
|
||||
List *relationRowLockList = NIL;
|
||||
|
@ -1904,6 +1915,7 @@ SingleShardSelectTaskList(Query *query, uint64 jobId, List *relationShardList,
|
|||
task->jobId = jobId;
|
||||
task->relationShardList = relationShardList;
|
||||
task->relationRowLockList = relationRowLockList;
|
||||
task->parametersInQueryStringResolved = parametersInQueryResolved;
|
||||
|
||||
return list_make1(task);
|
||||
}
|
||||
|
@ -1956,13 +1968,15 @@ RowLocksOnRelations(Node *node, List **relationRowLockList)
|
|||
*/
|
||||
static List *
|
||||
SingleShardModifyTaskList(Query *query, uint64 jobId, List *relationShardList,
|
||||
List *placementList, uint64 shardId)
|
||||
List *placementList, uint64 shardId,
|
||||
bool parametersInQueryResolved)
|
||||
{
|
||||
Task *task = CreateTask(MODIFY_TASK);
|
||||
List *rangeTableList = NIL;
|
||||
|
||||
ExtractRangeTableEntryWalker((Node *) query, &rangeTableList);
|
||||
RangeTblEntry *updateOrDeleteRTE = GetUpdateOrDeleteRTE(query);
|
||||
Assert(updateOrDeleteRTE != NULL);
|
||||
|
||||
DistTableCacheEntry *modificationTableCacheEntry = DistributedTableCacheEntry(
|
||||
updateOrDeleteRTE->relid);
|
||||
|
@ -1982,6 +1996,7 @@ SingleShardModifyTaskList(Query *query, uint64 jobId, List *relationShardList,
|
|||
task->jobId = jobId;
|
||||
task->relationShardList = relationShardList;
|
||||
task->replicationModel = modificationTableCacheEntry->replicationModel;
|
||||
task->parametersInQueryStringResolved = parametersInQueryResolved;
|
||||
|
||||
return list_make1(task);
|
||||
}
|
||||
|
@ -2091,8 +2106,9 @@ PlanRouterQuery(Query *originalQuery,
|
|||
plannerRestrictionContext->fastPathRestrictionContext->distributionKeyValue;
|
||||
|
||||
List *shardIntervalList =
|
||||
TargetShardIntervalForFastPathQuery(originalQuery, partitionValueConst,
|
||||
&isMultiShardQuery, distributionKeyValue);
|
||||
TargetShardIntervalForFastPathQuery(originalQuery, &isMultiShardQuery,
|
||||
distributionKeyValue,
|
||||
partitionValueConst);
|
||||
|
||||
/*
|
||||
* This could only happen when there is a parameter on the distribution key.
|
||||
|
@ -2375,25 +2391,37 @@ GetAnchorShardId(List *prunedShardIntervalListList)
|
|||
* one list of a a one shard interval (see FastPathRouterQuery()
|
||||
* for the detail).
|
||||
*
|
||||
* Also set the outgoing partition column value if requested via
|
||||
* partitionValueConst
|
||||
* If the caller requested the distributionKey value that this function
|
||||
* yields, set outputPartitionValueConst.
|
||||
*/
|
||||
List *
|
||||
TargetShardIntervalForFastPathQuery(Query *query, Const **partitionValueConst,
|
||||
bool *isMultiShardQuery, Const *distributionKeyValue)
|
||||
TargetShardIntervalForFastPathQuery(Query *query, bool *isMultiShardQuery,
|
||||
Const *inputDistributionKeyValue,
|
||||
Const **outputPartitionValueConst)
|
||||
{
|
||||
Oid relationId = ExtractFirstDistributedTableId(query);
|
||||
|
||||
if (distributionKeyValue)
|
||||
if (PartitionMethod(relationId) == DISTRIBUTE_BY_NONE)
|
||||
{
|
||||
/* we don't need to do shard pruning for reference tables */
|
||||
return list_make1(LoadShardIntervalList(relationId));
|
||||
}
|
||||
|
||||
if (inputDistributionKeyValue && !inputDistributionKeyValue->constisnull)
|
||||
{
|
||||
DistTableCacheEntry *cache = DistributedTableCacheEntry(relationId);
|
||||
ShardInterval *shardInterval =
|
||||
FindShardInterval(distributionKeyValue->constvalue, cache);
|
||||
FindShardInterval(inputDistributionKeyValue->constvalue, cache);
|
||||
if (shardInterval == NULL)
|
||||
{
|
||||
ereport(ERROR, (errmsg(
|
||||
"could not find shardinterval to which to send the query")));
|
||||
}
|
||||
|
||||
if (partitionValueConst != NULL)
|
||||
if (outputPartitionValueConst != NULL)
|
||||
{
|
||||
/* set the outgoing partition column value if requested */
|
||||
*partitionValueConst = distributionKeyValue;
|
||||
*outputPartitionValueConst = inputDistributionKeyValue;
|
||||
}
|
||||
List *shardIntervalList = list_make1(shardInterval);
|
||||
|
||||
|
@ -2402,10 +2430,24 @@ TargetShardIntervalForFastPathQuery(Query *query, Const **partitionValueConst,
|
|||
|
||||
Node *quals = query->jointree->quals;
|
||||
int relationIndex = 1;
|
||||
Const *queryPartitionValueConst = NULL;
|
||||
|
||||
/*
|
||||
* We couldn't do the shard pruning based on inputDistributionKeyValue as it might
|
||||
* be passed as NULL. Still, we can search the quals for distribution key.
|
||||
*/
|
||||
Const *distributionKeyValueInQuals = NULL;
|
||||
List *prunedShardIntervalList =
|
||||
PruneShards(relationId, relationIndex, make_ands_implicit((Expr *) quals),
|
||||
&queryPartitionValueConst);
|
||||
&distributionKeyValueInQuals);
|
||||
|
||||
if (!distributionKeyValueInQuals || distributionKeyValueInQuals->constisnull)
|
||||
{
|
||||
/*
|
||||
* If the distribution key equals to NULL, we prefer to treat it as a zero shard
|
||||
* query as it cannot return any rows.
|
||||
*/
|
||||
return NIL;
|
||||
}
|
||||
|
||||
/* we're only expecting single shard from a single table */
|
||||
Node *distKey PG_USED_FOR_ASSERTS_ONLY = NULL;
|
||||
|
@ -2416,10 +2458,10 @@ TargetShardIntervalForFastPathQuery(Query *query, Const **partitionValueConst,
|
|||
*isMultiShardQuery = true;
|
||||
}
|
||||
else if (list_length(prunedShardIntervalList) == 1 &&
|
||||
partitionValueConst != NULL)
|
||||
outputPartitionValueConst != NULL)
|
||||
{
|
||||
/* set the outgoing partition column value if requested */
|
||||
*partitionValueConst = queryPartitionValueConst;
|
||||
*outputPartitionValueConst = distributionKeyValueInQuals;
|
||||
}
|
||||
|
||||
return list_make1(prunedShardIntervalList);
|
||||
|
@ -2701,9 +2743,10 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError)
|
|||
OpExpr *equalityExpr = MakeOpExpression(partitionColumn,
|
||||
BTEqualStrategyNumber);
|
||||
Node *rightOp = get_rightop((Expr *) equalityExpr);
|
||||
Const *rightConst = (Const *) rightOp;
|
||||
|
||||
Assert(rightOp != NULL);
|
||||
Assert(IsA(rightOp, Const));
|
||||
Const *rightConst = (Const *) rightOp;
|
||||
|
||||
rightConst->constvalue = partitionValueConst->constvalue;
|
||||
rightConst->constisnull = partitionValueConst->constisnull;
|
||||
|
@ -3272,8 +3315,7 @@ CopyRelationRestrictionContext(RelationRestrictionContext *oldContext)
|
|||
|
||||
/* can't be copied, we copy (flatly) a RelOptInfo, and then decouple baserestrictinfo */
|
||||
newRestriction->relOptInfo = palloc(sizeof(RelOptInfo));
|
||||
memcpy(newRestriction->relOptInfo, oldRestriction->relOptInfo,
|
||||
sizeof(RelOptInfo));
|
||||
*newRestriction->relOptInfo = *oldRestriction->relOptInfo;
|
||||
|
||||
newRestriction->relOptInfo->baserestrictinfo =
|
||||
copyObject(oldRestriction->relOptInfo->baserestrictinfo);
|
||||
|
|
|
@ -81,9 +81,9 @@ static bool ShouldRecurseForRecurringTuplesJoinChecks(RelOptInfo *relOptInfo);
|
|||
static bool RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo,
|
||||
RelOptInfo *relationInfo,
|
||||
RecurringTuplesType *recurType);
|
||||
static bool IsRecurringRTE(RangeTblEntry *rangeTableEntry,
|
||||
RecurringTuplesType *recurType);
|
||||
static bool IsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType);
|
||||
static bool ContainsRecurringRTE(RangeTblEntry *rangeTableEntry,
|
||||
RecurringTuplesType *recurType);
|
||||
static bool ContainsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType);
|
||||
static bool HasRecurringTuples(Node *node, RecurringTuplesType *recurType);
|
||||
static MultiNode * SubqueryPushdownMultiNodeTree(Query *queryTree);
|
||||
static List * FlattenJoinVars(List *columnList, Query *queryTree);
|
||||
|
@ -751,7 +751,7 @@ FromClauseRecurringTupleType(Query *queryTree)
|
|||
* Try to figure out which type of recurring tuples we have to produce a
|
||||
* relevant error message. If there are several we'll pick the first one.
|
||||
*/
|
||||
IsRecurringRangeTable(queryTree->rtable, &recurType);
|
||||
ContainsRecurringRangeTable(queryTree->rtable, &recurType);
|
||||
|
||||
return recurType;
|
||||
}
|
||||
|
@ -1336,7 +1336,6 @@ static bool
|
|||
RelationInfoContainsOnlyRecurringTuples(PlannerInfo *plannerInfo,
|
||||
RelOptInfo *relationInfo)
|
||||
{
|
||||
RecurringTuplesType recurType;
|
||||
Relids relids = bms_copy(relationInfo->relids);
|
||||
int relationId = -1;
|
||||
|
||||
|
@ -1344,11 +1343,19 @@ RelationInfoContainsOnlyRecurringTuples(PlannerInfo *plannerInfo,
|
|||
{
|
||||
RangeTblEntry *rangeTableEntry = plannerInfo->simple_rte_array[relationId];
|
||||
|
||||
/* relationInfo has this range table entry */
|
||||
if (!IsRecurringRTE(rangeTableEntry, &recurType))
|
||||
if (FindNodeCheckInRangeTableList(list_make1(rangeTableEntry),
|
||||
IsDistributedTableRTE))
|
||||
{
|
||||
/* we already found a distributed table, no need to check further */
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there are no distributed tables, there should be at least
|
||||
* one recurring rte.
|
||||
*/
|
||||
RecurringTuplesType recurType PG_USED_FOR_ASSERTS_ONLY;
|
||||
Assert(ContainsRecurringRTE(rangeTableEntry, &recurType));
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -1376,7 +1383,7 @@ RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relati
|
|||
RangeTblEntry *rangeTableEntry = plannerInfo->simple_rte_array[relationId];
|
||||
|
||||
/* relationInfo has this range table entry */
|
||||
if (IsRecurringRTE(rangeTableEntry, recurType))
|
||||
if (ContainsRecurringRTE(rangeTableEntry, recurType))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
@ -1387,24 +1394,24 @@ RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relati
|
|||
|
||||
|
||||
/*
|
||||
* IsRecurringRTE returns whether the range table entry will generate
|
||||
* the same set of tuples when repeating it in a query on different
|
||||
* shards.
|
||||
* ContainsRecurringRTE returns whether the range table entry contains
|
||||
* any entry that generates the same set of tuples when repeating it in
|
||||
* a query on different shards.
|
||||
*/
|
||||
static bool
|
||||
IsRecurringRTE(RangeTblEntry *rangeTableEntry, RecurringTuplesType *recurType)
|
||||
ContainsRecurringRTE(RangeTblEntry *rangeTableEntry, RecurringTuplesType *recurType)
|
||||
{
|
||||
return IsRecurringRangeTable(list_make1(rangeTableEntry), recurType);
|
||||
return ContainsRecurringRangeTable(list_make1(rangeTableEntry), recurType);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsRecurringRangeTable returns whether the range table will generate
|
||||
* the same set of tuples when repeating it in a query on different
|
||||
* shards.
|
||||
* ContainsRecurringRangeTable returns whether the range table list contains
|
||||
* any entry that generates the same set of tuples when repeating it in
|
||||
* a query on different shards.
|
||||
*/
|
||||
static bool
|
||||
IsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType)
|
||||
ContainsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType)
|
||||
{
|
||||
return range_table_walker(rangeTable, HasRecurringTuples, recurType,
|
||||
QTW_EXAMINE_RTES_BEFORE);
|
||||
|
|
|
@ -59,6 +59,7 @@
|
|||
#include "distributed/commands/multi_copy.h"
|
||||
#include "distributed/distributed_planner.h"
|
||||
#include "distributed/errormessage.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/log_utils.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/multi_logical_planner.h"
|
||||
|
@ -147,7 +148,7 @@ static void RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query,
|
|||
colocatedJoinChecker,
|
||||
RecursivePlanningContext *
|
||||
recursivePlanningContext);
|
||||
static List * SublinkList(Query *originalQuery);
|
||||
static List * SublinkListFromWhere(Query *originalQuery);
|
||||
static bool ExtractSublinkWalker(Node *node, List **sublinkList);
|
||||
static bool ShouldRecursivelyPlanAllSubqueriesInWhere(Query *query);
|
||||
static bool RecursivelyPlanAllSubqueries(Node *node,
|
||||
|
@ -173,6 +174,7 @@ static bool CteReferenceListWalker(Node *node, CteReferenceWalkerContext *contex
|
|||
static bool ContainsReferencesToOuterQuery(Query *query);
|
||||
static bool ContainsReferencesToOuterQueryWalker(Node *node,
|
||||
VarLevelsUpWalkerContext *context);
|
||||
static bool NodeContainsSubqueryReferencingOuterQuery(Node *node);
|
||||
static void WrapFunctionsInSubqueries(Query *query);
|
||||
static void TransformFunctionRTE(RangeTblEntry *rangeTblEntry);
|
||||
static bool ShouldTransformRTE(RangeTblEntry *rangeTableEntry);
|
||||
|
@ -314,6 +316,18 @@ RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext *context
|
|||
RecursivelyPlanAllSubqueries((Node *) query->jointree->quals, context);
|
||||
}
|
||||
|
||||
if (query->havingQual != NULL)
|
||||
{
|
||||
if (NodeContainsSubqueryReferencingOuterQuery(query->havingQual))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"Subqueries in HAVING cannot refer to outer query",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
RecursivelyPlanAllSubqueries(query->havingQual, context);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the query doesn't have distribution key equality,
|
||||
* recursively plan some of its subqueries.
|
||||
|
@ -528,7 +542,7 @@ RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query,
|
|||
RecursivePlanningContext *
|
||||
recursivePlanningContext)
|
||||
{
|
||||
List *sublinkList = SublinkList(query);
|
||||
List *sublinkList = SublinkListFromWhere(query);
|
||||
ListCell *sublinkCell = NULL;
|
||||
|
||||
foreach(sublinkCell, sublinkList)
|
||||
|
@ -551,12 +565,12 @@ RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query,
|
|||
|
||||
|
||||
/*
|
||||
* SublinkList finds the subquery nodes in the where clause of the given query. Note
|
||||
* SublinkListFromWhere finds the subquery nodes in the where clause of the given query. Note
|
||||
* that the function should be called on the original query given that postgres
|
||||
* standard_planner() may convert the subqueries in WHERE clause to joins.
|
||||
*/
|
||||
static List *
|
||||
SublinkList(Query *originalQuery)
|
||||
SublinkListFromWhere(Query *originalQuery)
|
||||
{
|
||||
FromExpr *joinTree = originalQuery->jointree;
|
||||
List *sublinkList = NIL;
|
||||
|
@ -648,8 +662,7 @@ RecursivelyPlanAllSubqueries(Node *node, RecursivePlanningContext *planningConte
|
|||
if (IsA(node, Query))
|
||||
{
|
||||
Query *query = (Query *) node;
|
||||
|
||||
if (FindNodeCheckInRangeTableList(query->rtable, IsDistributedTableRTE))
|
||||
if (FindNodeCheckInRangeTableList(query->rtable, IsCitusTableRTE))
|
||||
{
|
||||
RecursivelyPlanSubquery(query, planningContext);
|
||||
}
|
||||
|
@ -1025,7 +1038,7 @@ RecursivelyPlanSetOperations(Query *query, Node *node,
|
|||
Query *subquery = rangeTableEntry->subquery;
|
||||
|
||||
if (rangeTableEntry->rtekind == RTE_SUBQUERY &&
|
||||
QueryContainsDistributedTableRTE(subquery))
|
||||
FindNodeCheck((Node *) subquery, IsDistributedTableRTE))
|
||||
{
|
||||
RecursivelyPlanSubquery(subquery, context);
|
||||
}
|
||||
|
@ -1142,7 +1155,7 @@ RecursivelyPlanSubquery(Query *subquery, RecursivePlanningContext *planningConte
|
|||
}
|
||||
|
||||
/* finally update the input subquery to point the result query */
|
||||
memcpy(subquery, resultQuery, sizeof(Query));
|
||||
*subquery = *resultQuery;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1223,7 +1236,7 @@ CteReferenceListWalker(Node *node, CteReferenceWalkerContext *context)
|
|||
|
||||
/*
|
||||
* ContainsReferencesToOuterQuery determines whether the given query contains
|
||||
* any Vars that point outside of the query itself. Such queries cannot be
|
||||
* anything that points outside of the query itself. Such queries cannot be
|
||||
* planned recursively.
|
||||
*/
|
||||
static bool
|
||||
|
@ -1302,6 +1315,29 @@ ContainsReferencesToOuterQueryWalker(Node *node, VarLevelsUpWalkerContext *conte
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* NodeContainsSubqueryReferencingOuterQuery determines whether the given node
|
||||
* contains anything that points outside of the query itself.
|
||||
*/
|
||||
static bool
|
||||
NodeContainsSubqueryReferencingOuterQuery(Node *node)
|
||||
{
|
||||
List *sublinks = NIL;
|
||||
ExtractSublinkWalker(node, &sublinks);
|
||||
|
||||
SubLink *sublink;
|
||||
foreach_ptr(sublink, sublinks)
|
||||
{
|
||||
if (ContainsReferencesToOuterQuery(castNode(Query, sublink->subselect)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* WrapFunctionsInSubqueries iterates over all the immediate Range Table Entries
|
||||
* of a query and wraps the functions inside (SELECT * FROM fnc() f)
|
||||
|
@ -1397,7 +1433,11 @@ TransformFunctionRTE(RangeTblEntry *rangeTblEntry)
|
|||
*
|
||||
* We will iterate over Tuple Description attributes. i.e (c1 int, c2 text)
|
||||
*/
|
||||
for (targetColumnIndex = 0; targetColumnIndex < tupleDesc->natts;
|
||||
if (tupleDesc->natts > MaxAttrNumber)
|
||||
{
|
||||
ereport(ERROR, (errmsg("bad number of tuple descriptor attributes")));
|
||||
}
|
||||
for (targetColumnIndex = 0; targetColumnIndex < (AttrNumber) tupleDesc->natts;
|
||||
targetColumnIndex++)
|
||||
{
|
||||
FormData_pg_attribute *attribute = TupleDescAttr(tupleDesc,
|
||||
|
|
|
@ -326,6 +326,7 @@ SafeToPushdownUnionSubquery(PlannerRestrictionContext *plannerRestrictionContext
|
|||
continue;
|
||||
}
|
||||
|
||||
Assert(varToBeAdded != NULL);
|
||||
AddToAttributeEquivalenceClass(&attributeEquivalance, relationPlannerRoot,
|
||||
varToBeAdded);
|
||||
}
|
||||
|
@ -409,7 +410,7 @@ FindTranslatedVar(List *appendRelList, Oid relationOid, Index relationRteIndex,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
Var *relationPartitionKey = DistPartitionKey(relationOid);
|
||||
Var *relationPartitionKey = ForceDistPartitionKey(relationOid);
|
||||
|
||||
List *translaterVars = targetAppendRelInfo->translated_vars;
|
||||
foreach(translatedVarCell, translaterVars)
|
||||
|
|
|
@ -151,6 +151,23 @@ typedef union \
|
|||
typedef FunctionCallInfoData FunctionCall2InfoData;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We also ignore this warning in ./configure, but that's not always enough.
|
||||
* The flags that are used during compilation by ./configure are determined by
|
||||
* the compiler support it detects. This is usually GCC. This warning is only
|
||||
* present in clang. So it would normally be fine to not use it with GCC. The
|
||||
* problem is that clang is used to compile the JIT bitcode when postgres is
|
||||
* compiled with -with-llvm. So in the end both clang and GCC are used to
|
||||
* compile the project.
|
||||
*
|
||||
* So the flag is not provided on the command line, because ./configure notices
|
||||
* that GCC doesn't support it. But this warning persists when compiling the
|
||||
* bitcode. So that's why we ignore it here explicitly.
|
||||
*/
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wgnu-variable-sized-type-not-at-end"
|
||||
#endif /* __clang__ */
|
||||
|
||||
/*
|
||||
* Data necessary to perform a single PruneShards().
|
||||
*/
|
||||
|
@ -749,13 +766,21 @@ AddPartitionKeyRestrictionToInstance(ClauseWalkerContext *context, OpExpr *opCla
|
|||
constantClause);
|
||||
if (constantClause == NULL)
|
||||
{
|
||||
/* couldn't coerce value, so we note this as a restriction we don't grok */
|
||||
/* couldn't coerce value, so we save it in otherRestrictions */
|
||||
prune->otherRestrictions = lappend(prune->otherRestrictions, opClause);
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (constantClause->constisnull)
|
||||
{
|
||||
/* we cannot do pruning for NULL values, so we save it in otherRestrictions */
|
||||
prune->otherRestrictions = lappend(prune->otherRestrictions, opClause);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* at this point, we'd better be able to pass binary Datums to comparison functions */
|
||||
Assert(IsBinaryCoercible(constantClause->consttype, partitionColumn->vartype));
|
||||
|
||||
|
@ -973,7 +998,7 @@ CopyPartialPruningInstance(PruningInstance *sourceInstance)
|
|||
* being partial - if necessary it'll be marked so again by
|
||||
* PrunableExpressionsWalker().
|
||||
*/
|
||||
memcpy(newInstance, sourceInstance, sizeof(PruningInstance));
|
||||
*newInstance = *sourceInstance;
|
||||
newInstance->addedToPruningInstances = false;
|
||||
newInstance->isPartial = false;
|
||||
|
||||
|
|
|
@ -0,0 +1,248 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* tdigest_extension.c
|
||||
* Helper functions to get access to tdigest specific data.
|
||||
*
|
||||
* Copyright (c) Citus Data, Inc.
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/genam.h"
|
||||
#include "access/htup_details.h"
|
||||
#include "catalog/pg_extension.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/tdigest_extension.h"
|
||||
#include "parser/parse_func.h"
|
||||
#include "utils/fmgroids.h"
|
||||
#include "utils/lsyscache.h"
|
||||
|
||||
|
||||
static Oid LookupTDigestFunction(const char *functionName, int argcount, Oid *argtypes);
|
||||
|
||||
/*
|
||||
* TDigestExtensionSchema finds the schema the tdigest extension is installed in. The
|
||||
* function will return InvalidOid if the extension is not installed.
|
||||
*/
|
||||
Oid
|
||||
TDigestExtensionSchema()
|
||||
{
|
||||
ScanKeyData entry[1];
|
||||
Form_pg_extension extensionForm = NULL;
|
||||
Oid tdigestExtensionSchema = InvalidOid;
|
||||
|
||||
Relation relation = heap_open(ExtensionRelationId, AccessShareLock);
|
||||
|
||||
ScanKeyInit(&entry[0],
|
||||
Anum_pg_extension_extname,
|
||||
BTEqualStrategyNumber, F_NAMEEQ,
|
||||
CStringGetDatum("tdigest"));
|
||||
|
||||
SysScanDesc scandesc = systable_beginscan(relation, ExtensionNameIndexId, true,
|
||||
NULL, 1, entry);
|
||||
|
||||
HeapTuple extensionTuple = systable_getnext(scandesc);
|
||||
|
||||
/*
|
||||
* We assume that there can be at most one matching tuple, if no tuple found the
|
||||
* extension is not installed. The value of InvalidOid will not be changed.
|
||||
*/
|
||||
if (HeapTupleIsValid(extensionTuple))
|
||||
{
|
||||
extensionForm = (Form_pg_extension) GETSTRUCT(extensionTuple);
|
||||
tdigestExtensionSchema = extensionForm->extnamespace;
|
||||
Assert(OidIsValid(tdigestExtensionSchema));
|
||||
}
|
||||
|
||||
systable_endscan(scandesc);
|
||||
|
||||
heap_close(relation, AccessShareLock);
|
||||
|
||||
return tdigestExtensionSchema;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TDigestExtensionTypeOid performs a lookup for the Oid of the type representing the
|
||||
* tdigest as installed by the tdigest extension returns InvalidOid if the type cannot be
|
||||
* found.
|
||||
*/
|
||||
Oid
|
||||
TDigestExtensionTypeOid()
|
||||
{
|
||||
Oid tdigestSchemaOid = TDigestExtensionSchema();
|
||||
if (!OidIsValid(tdigestSchemaOid))
|
||||
{
|
||||
return InvalidOid;
|
||||
}
|
||||
char *namespaceName = get_namespace_name(tdigestSchemaOid);
|
||||
return LookupTypeOid(namespaceName, "tdigest");
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* LookupTDigestFunction is a helper function specifically to lookup functions in the
|
||||
* namespace/schema where the tdigest extension is installed. This makes the lookup of
|
||||
* following aggregate functions easier and less repetitive.
|
||||
*/
|
||||
static Oid
|
||||
LookupTDigestFunction(const char *functionName, int argcount, Oid *argtypes)
|
||||
{
|
||||
Oid tdigestSchemaOid = TDigestExtensionSchema();
|
||||
if (!OidIsValid(tdigestSchemaOid))
|
||||
{
|
||||
return InvalidOid;
|
||||
}
|
||||
|
||||
char *namespaceName = get_namespace_name(tdigestSchemaOid);
|
||||
return LookupFuncName(
|
||||
list_make2(makeString(namespaceName), makeString(pstrdup(functionName))),
|
||||
argcount, argtypes, true);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TDigestExtensionAggTDigest1 performs a lookup for the Oid of the tdigest aggregate;
|
||||
* tdigest(tdigest)
|
||||
*
|
||||
* If the aggregate is not found InvalidOid is returned.
|
||||
*/
|
||||
Oid
|
||||
TDigestExtensionAggTDigest1()
|
||||
{
|
||||
return LookupTDigestFunction("tdigest", 1, (Oid[]) { TDigestExtensionTypeOid() });
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TDigestExtensionAggTDigest2 performs a lookup for the Oid of the tdigest aggregate;
|
||||
* tdigest(value double precision, compression int)
|
||||
*
|
||||
* If the aggregate is not found InvalidOid is returned.
|
||||
*/
|
||||
Oid
|
||||
TDigestExtensionAggTDigest2()
|
||||
{
|
||||
return LookupTDigestFunction("tdigest", 2, (Oid[]) { FLOAT8OID, INT4OID });
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TDigestExtensionAggTDigestPercentile2 performs a lookup for the Oid of the tdigest
|
||||
* aggregate;
|
||||
* tdigest_percentile(tdigest, double precision)
|
||||
*
|
||||
* If the aggregate is not found InvalidOid is returned.
|
||||
*/
|
||||
Oid
|
||||
TDigestExtensionAggTDigestPercentile2()
|
||||
{
|
||||
return LookupTDigestFunction("tdigest_percentile", 2,
|
||||
(Oid[]) { TDigestExtensionTypeOid(), FLOAT8OID });
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TDigestExtensionAggTDigestPercentile2a performs a lookup for the Oid of the tdigest
|
||||
* aggregate;
|
||||
* tdigest_percentile(tdigest, double precision[])
|
||||
*
|
||||
* If the aggregate is not found InvalidOid is returned.
|
||||
*/
|
||||
Oid
|
||||
TDigestExtensionAggTDigestPercentile2a(void)
|
||||
{
|
||||
return LookupTDigestFunction("tdigest_percentile", 2,
|
||||
(Oid[]) { TDigestExtensionTypeOid(), FLOAT8ARRAYOID });
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TDigestExtensionAggTDigestPercentile3 performs a lookup for the Oid of the tdigest
|
||||
* aggregate;
|
||||
* tdigest_percentile(double precision, int, double precision)
|
||||
*
|
||||
* If the aggregate is not found InvalidOid is returned.
|
||||
*/
|
||||
Oid
|
||||
TDigestExtensionAggTDigestPercentile3()
|
||||
{
|
||||
return LookupTDigestFunction("tdigest_percentile", 3,
|
||||
(Oid[]) { FLOAT8OID, INT4OID, FLOAT8OID });
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TDigestExtensionAggTDigestPercentile3a performs a lookup for the Oid of the tdigest
|
||||
* aggregate;
|
||||
* tdigest_percentile(double precision, int, double precision[])
|
||||
*
|
||||
* If the aggregate is not found InvalidOid is returned.
|
||||
*/
|
||||
Oid
|
||||
TDigestExtensionAggTDigestPercentile3a(void)
|
||||
{
|
||||
return LookupTDigestFunction("tdigest_percentile", 3,
|
||||
(Oid[]) { FLOAT8OID, INT4OID, FLOAT8ARRAYOID });
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TDigestExtensionAggTDigestPercentileOf2 performs a lookup for the Oid of the tdigest
|
||||
* aggregate;
|
||||
* tdigest_percentile_of(tdigest, double precision)
|
||||
*
|
||||
* If the aggregate is not found InvalidOid is returned.
|
||||
*/
|
||||
Oid
|
||||
TDigestExtensionAggTDigestPercentileOf2()
|
||||
{
|
||||
return LookupTDigestFunction("tdigest_percentile_of", 2,
|
||||
(Oid[]) { TDigestExtensionTypeOid(), FLOAT8OID });
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TDigestExtensionAggTDigestPercentileOf2a performs a lookup for the Oid of the tdigest
|
||||
* aggregate;
|
||||
* tdigest_percentile_of(tdigest, double precision[])
|
||||
*
|
||||
* If the aggregate is not found InvalidOid is returned.
|
||||
*/
|
||||
Oid
|
||||
TDigestExtensionAggTDigestPercentileOf2a(void)
|
||||
{
|
||||
return LookupTDigestFunction("tdigest_percentile_of", 2,
|
||||
(Oid[]) { TDigestExtensionTypeOid(), FLOAT8ARRAYOID });
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TDigestExtensionAggTDigestPercentileOf3 performs a lookup for the Oid of the tdigest
|
||||
* aggregate;
|
||||
* tdigest_percentile_of(double precision, int, double precision)
|
||||
*
|
||||
* If the aggregate is not found InvalidOid is returned.
|
||||
*/
|
||||
Oid
|
||||
TDigestExtensionAggTDigestPercentileOf3()
|
||||
{
|
||||
return LookupTDigestFunction("tdigest_percentile_of", 3,
|
||||
(Oid[]) { FLOAT8OID, INT4OID, FLOAT8OID });
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TDigestExtensionAggTDigestPercentileOf3a performs a lookup for the Oid of the tdigest
|
||||
* aggregate;
|
||||
* tdigest_percentile_of(double precision, int, double precision[])
|
||||
*
|
||||
* If the aggregate is not found InvalidOid is returned.
|
||||
*/
|
||||
Oid
|
||||
TDigestExtensionAggTDigestPercentileOf3a(void)
|
||||
{
|
||||
return LookupTDigestFunction("tdigest_percentile_of", 3,
|
||||
(Oid[]) { FLOAT8OID, INT4OID, FLOAT8ARRAYOID });
|
||||
}
|
|
@ -29,6 +29,7 @@
|
|||
#include "catalog/namespace.h"
|
||||
#include "catalog/pg_class.h"
|
||||
#include "catalog/pg_constraint.h"
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "distributed/commands.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/relay_utility.h"
|
||||
|
@ -694,8 +695,8 @@ AppendShardIdToName(char **name, uint64 shardId)
|
|||
NAMEDATALEN)));
|
||||
}
|
||||
|
||||
snprintf(shardIdAndSeparator, NAMEDATALEN, "%c" UINT64_FORMAT,
|
||||
SHARD_NAME_SEPARATOR, shardId);
|
||||
SafeSnprintf(shardIdAndSeparator, NAMEDATALEN, "%c" UINT64_FORMAT,
|
||||
SHARD_NAME_SEPARATOR, shardId);
|
||||
int shardIdAndSeparatorLength = strlen(shardIdAndSeparator);
|
||||
|
||||
/*
|
||||
|
@ -705,7 +706,7 @@ AppendShardIdToName(char **name, uint64 shardId)
|
|||
|
||||
if (nameLength < (NAMEDATALEN - shardIdAndSeparatorLength))
|
||||
{
|
||||
snprintf(extendedName, NAMEDATALEN, "%s%s", (*name), shardIdAndSeparator);
|
||||
SafeSnprintf(extendedName, NAMEDATALEN, "%s%s", (*name), shardIdAndSeparator);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -739,14 +740,14 @@ AppendShardIdToName(char **name, uint64 shardId)
|
|||
multiByteClipLength = pg_mbcliplen(*name, nameLength, (NAMEDATALEN -
|
||||
shardIdAndSeparatorLength -
|
||||
10));
|
||||
snprintf(extendedName, NAMEDATALEN, "%.*s%c%.8x%s",
|
||||
multiByteClipLength, (*name),
|
||||
SHARD_NAME_SEPARATOR, longNameHash,
|
||||
shardIdAndSeparator);
|
||||
SafeSnprintf(extendedName, NAMEDATALEN, "%.*s%c%.8x%s",
|
||||
multiByteClipLength, (*name),
|
||||
SHARD_NAME_SEPARATOR, longNameHash,
|
||||
shardIdAndSeparator);
|
||||
}
|
||||
|
||||
(*name) = (char *) repalloc((*name), NAMEDATALEN);
|
||||
int neededBytes = snprintf((*name), NAMEDATALEN, "%s", extendedName);
|
||||
int neededBytes = SafeSnprintf((*name), NAMEDATALEN, "%s", extendedName);
|
||||
if (neededBytes < 0)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
../../../vendor/safestringlib/safeclib/
|
|
@ -13,7 +13,7 @@
|
|||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
/* necessary to get alloca() on illumos */
|
||||
/* necessary to get alloca on illumos */
|
||||
#ifdef __sun
|
||||
#include <alloca.h>
|
||||
#endif
|
||||
|
@ -21,11 +21,14 @@
|
|||
#include "fmgr.h"
|
||||
#include "miscadmin.h"
|
||||
|
||||
#include "safe_lib.h"
|
||||
|
||||
#include "citus_version.h"
|
||||
#include "commands/explain.h"
|
||||
#include "executor/executor.h"
|
||||
#include "distributed/backend_data.h"
|
||||
#include "distributed/citus_nodefuncs.h"
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "distributed/commands.h"
|
||||
#include "distributed/commands/multi_copy.h"
|
||||
#include "distributed/commands/utility_hook.h"
|
||||
|
@ -186,6 +189,14 @@ _PG_init(void)
|
|||
"shared_preload_libraries.")));
|
||||
}
|
||||
|
||||
/*
|
||||
* Register contstraint_handler hooks of safestringlib first. This way
|
||||
* loading the extension will error out if one of these constraints are hit
|
||||
* during load.
|
||||
*/
|
||||
set_str_constraint_handler_s(ereport_constraint_handler);
|
||||
set_mem_constraint_handler_s(ereport_constraint_handler);
|
||||
|
||||
/*
|
||||
* Perform checks before registering any hooks, to avoid erroring out in a
|
||||
* partial state.
|
||||
|
@ -289,7 +300,13 @@ ResizeStackToMaximumDepth(void)
|
|||
#ifndef WIN32
|
||||
long max_stack_depth_bytes = max_stack_depth * 1024L;
|
||||
|
||||
volatile char *stack_resizer = alloca(max_stack_depth_bytes);
|
||||
/*
|
||||
* Explanation of IGNORE-BANNED:
|
||||
* alloca is safe to use here since we limit the allocated size. We cannot
|
||||
* use malloc as a replacement, since we actually want to grow the stack
|
||||
* here.
|
||||
*/
|
||||
volatile char *stack_resizer = alloca(max_stack_depth_bytes); /* IGNORE-BANNED */
|
||||
|
||||
/*
|
||||
* Different architectures might have different directions while
|
||||
|
@ -1438,6 +1455,12 @@ NodeConninfoGucAssignHook(const char *newval, void *extra)
|
|||
newval = "";
|
||||
}
|
||||
|
||||
if (strcmp(newval, NodeConninfo) == 0)
|
||||
{
|
||||
/* It did not change, no need to do anything */
|
||||
return;
|
||||
}
|
||||
|
||||
PQconninfoOption *optionArray = PQconninfoParse(newval, NULL);
|
||||
if (optionArray == NULL)
|
||||
{
|
||||
|
@ -1459,6 +1482,14 @@ NodeConninfoGucAssignHook(const char *newval, void *extra)
|
|||
}
|
||||
|
||||
PQconninfoFree(optionArray);
|
||||
|
||||
/*
|
||||
* Mark all connections for shutdown, since they have been opened using old
|
||||
* connection settings. This is mostly important when changing SSL
|
||||
* parameters, otherwise these would not be applied and connections could
|
||||
* be unencrypted when the user doesn't want that.
|
||||
*/
|
||||
CloseAllConnectionsAfterTransaction();
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
/* we've some issues with versioning, and we're fixing it by bumping version */
|
||||
/* from 9.2-2 to 9.2-4 see #3673 for details */
|
|
@ -0,0 +1,18 @@
|
|||
/* citus--9.3-1--9.2-4 */
|
||||
/* this is an unusual upgrade path, we are doing it because */
|
||||
/* we have accidentally tagged master branch with v9.2-3 */
|
||||
/* however master branch was already bumped to v9.3-1 */
|
||||
/* with this file, we are undoing the catalog changes that */
|
||||
/* have happened between 9.2-2 to 9.3-1, and making 9.2-4 */
|
||||
/* as the release that we can */
|
||||
|
||||
-- undo the changes for citus_extradata_container that happened on citus 9.3
|
||||
DROP FUNCTION IF EXISTS pg_catalog.citus_extradata_container(INTERNAL);
|
||||
CREATE FUNCTION pg_catalog.citus_extradata_container(INTERNAL)
|
||||
RETURNS void
|
||||
LANGUAGE C
|
||||
AS 'MODULE_PATHNAME', $$citus_extradata_container$$;
|
||||
COMMENT ON FUNCTION pg_catalog.citus_extradata_container(INTERNAL)
|
||||
IS 'placeholder function to store additional data in postgres node trees';
|
||||
|
||||
DROP FUNCTION IF EXISTS pg_catalog.update_distributed_table_colocation(regclass, text);
|
|
@ -219,8 +219,8 @@ create_monolithic_shard_row(PG_FUNCTION_ARGS)
|
|||
StringInfo maxInfo = makeStringInfo();
|
||||
uint64 newShardId = GetNextShardId();
|
||||
|
||||
appendStringInfo(minInfo, "%d", INT32_MIN);
|
||||
appendStringInfo(maxInfo, "%d", INT32_MAX);
|
||||
appendStringInfo(minInfo, "%d", PG_INT32_MIN);
|
||||
appendStringInfo(maxInfo, "%d", PG_INT32_MAX);
|
||||
|
||||
text *minInfoText = cstring_to_text(minInfo->data);
|
||||
text *maxInfoText = cstring_to_text(maxInfo->data);
|
||||
|
|
|
@ -46,7 +46,7 @@ store_intermediate_result_on_node(PG_FUNCTION_ARGS)
|
|||
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
WorkerNode *workerNode = FindWorkerNode(nodeNameString, nodePort);
|
||||
WorkerNode *workerNode = ForceFindWorkerNode(nodeNameString, nodePort);
|
||||
|
||||
/*
|
||||
* Make sure that this transaction has a distributed transaction ID.
|
||||
|
|
|
@ -173,7 +173,11 @@ MakeTextPartitionExpression(Oid distributedTableId, text *value)
|
|||
if (value != NULL)
|
||||
{
|
||||
OpExpr *equalityExpr = MakeOpExpression(partitionColumn, BTEqualStrategyNumber);
|
||||
Const *rightConst = (Const *) get_rightop((Expr *) equalityExpr);
|
||||
Node *rightOp = get_rightop((Expr *) equalityExpr);
|
||||
|
||||
Assert(rightOp != NULL);
|
||||
Assert(IsA(rightOp, Const));
|
||||
Const *rightConst = (Const *) rightOp;
|
||||
|
||||
rightConst->constvalue = (Datum) value;
|
||||
rightConst->constisnull = false;
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* xact_stats.c
|
||||
*
|
||||
* This file contains functions to provide helper UDFs for testing transaction
|
||||
* statistics.
|
||||
*
|
||||
* Copyright (c) Citus Data, Inc.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "postgres.h"
|
||||
#include "funcapi.h"
|
||||
#include "libpq-fe.h"
|
||||
#include "miscadmin.h"
|
||||
#include "pgstat.h"
|
||||
|
||||
static Size MemoryContextTotalSpace(MemoryContext context);
|
||||
|
||||
PG_FUNCTION_INFO_V1(top_transaction_context_size);
|
||||
|
||||
/*
|
||||
* top_transaction_context_size returns current size of TopTransactionContext.
|
||||
*/
|
||||
Datum
|
||||
top_transaction_context_size(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Size totalSpace = MemoryContextTotalSpace(TopTransactionContext);
|
||||
PG_RETURN_INT64(totalSpace);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* MemoryContextTotalSpace returns total space allocated in context and its children.
|
||||
*/
|
||||
static Size
|
||||
MemoryContextTotalSpace(MemoryContext context)
|
||||
{
|
||||
Size totalSpace = 0;
|
||||
|
||||
MemoryContextCounters totals = { 0 };
|
||||
TopTransactionContext->methods->stats(TopTransactionContext, NULL, NULL, &totals);
|
||||
totalSpace += totals.totalspace;
|
||||
|
||||
for (MemoryContext child = context->firstchild;
|
||||
child != NULL;
|
||||
child = child->nextchild)
|
||||
{
|
||||
totalSpace += MemoryContextTotalSpace(child);
|
||||
}
|
||||
|
||||
return totalSpace;
|
||||
}
|
|
@ -806,7 +806,7 @@ GetBackendDataForProc(PGPROC *proc, BackendData *result)
|
|||
|
||||
SpinLockAcquire(&backendData->mutex);
|
||||
|
||||
memcpy(result, backendData, sizeof(BackendData));
|
||||
*result = *backendData;
|
||||
|
||||
SpinLockRelease(&backendData->mutex);
|
||||
}
|
||||
|
|
|
@ -543,6 +543,11 @@ ReplaceInitiatorNodeIdentifier(int initiator_node_identifier,
|
|||
|
||||
/* a query should run on an existing node */
|
||||
Assert(nodeExists);
|
||||
if (initiatorWorkerNode == NULL)
|
||||
{
|
||||
ereport(ERROR, (errmsg("no primary node found for group %d",
|
||||
initiator_node_identifier)));
|
||||
}
|
||||
citusDistStat->master_query_host_name =
|
||||
cstring_to_text(initiatorWorkerNode->workerName);
|
||||
citusDistStat->master_query_host_port = initiatorWorkerNode->workerPort;
|
||||
|
@ -611,7 +616,7 @@ LocalNodeCitusDistStat(const char *statQuery, const char *hostname, int port)
|
|||
*/
|
||||
oldContext = MemoryContextSwitchTo(upperContext);
|
||||
|
||||
for (uint32 rowIndex = 0; rowIndex < SPI_processed; rowIndex++)
|
||||
for (uint64 rowIndex = 0; rowIndex < SPI_processed; rowIndex++)
|
||||
{
|
||||
TupleDesc rowDescriptor = SPI_tuptable->tupdesc;
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "access/xact.h"
|
||||
#include "distributed/backend_data.h"
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "distributed/connection_management.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/remote_commands.h"
|
||||
|
@ -1330,9 +1331,9 @@ Assign2PCIdentifier(MultiConnection *connection)
|
|||
uint64 transactionNumber = CurrentDistributedTransactionNumber();
|
||||
|
||||
/* print all numbers as unsigned to guarantee no minus symbols appear in the name */
|
||||
snprintf(connection->remoteTransaction.preparedName, NAMEDATALEN,
|
||||
PREPARED_TRANSACTION_NAME_FORMAT, GetLocalGroupId(), MyProcPid,
|
||||
transactionNumber, connectionNumber++);
|
||||
SafeSnprintf(connection->remoteTransaction.preparedName, NAMEDATALEN,
|
||||
PREPARED_TRANSACTION_NAME_FORMAT, GetLocalGroupId(), MyProcPid,
|
||||
transactionNumber, connectionNumber++);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "access/twophase.h"
|
||||
#include "access/xact.h"
|
||||
#include "distributed/backend_data.h"
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "distributed/connection_management.h"
|
||||
#include "distributed/distributed_planner.h"
|
||||
#include "distributed/hash_helpers.h"
|
||||
|
@ -538,7 +539,7 @@ AdjustMaxPreparedTransactions(void)
|
|||
{
|
||||
char newvalue[12];
|
||||
|
||||
snprintf(newvalue, sizeof(newvalue), "%d", MaxConnections * 2);
|
||||
SafeSnprintf(newvalue, sizeof(newvalue), "%d", MaxConnections * 2);
|
||||
|
||||
SetConfigOption("max_prepared_transactions", newvalue, PGC_POSTMASTER,
|
||||
PGC_S_OVERRIDE);
|
||||
|
@ -555,7 +556,17 @@ AdjustMaxPreparedTransactions(void)
|
|||
static void
|
||||
PushSubXact(SubTransactionId subId)
|
||||
{
|
||||
MemoryContext old_context = MemoryContextSwitchTo(CurTransactionContext);
|
||||
/*
|
||||
* We need to allocate these in TopTransactionContext instead of current
|
||||
* subxact's memory context. This is because AtSubCommit_Memory won't
|
||||
* delete the subxact's memory context unless it is empty, and this
|
||||
* can cause in memory leaks. For emptiness it just checks if the memory
|
||||
* has been reset, and we cannot reset the subxact context since other
|
||||
* data can be in the context that are needed by upper commits.
|
||||
*
|
||||
* See https://github.com/citusdata/citus/issues/3999
|
||||
*/
|
||||
MemoryContext old_context = MemoryContextSwitchTo(TopTransactionContext);
|
||||
|
||||
/* save provided subId as well as propagated SET LOCAL stmts */
|
||||
SubXactContext *state = palloc(sizeof(SubXactContext));
|
||||
|
@ -574,19 +585,34 @@ PushSubXact(SubTransactionId subId)
|
|||
static void
|
||||
PopSubXact(SubTransactionId subId)
|
||||
{
|
||||
MemoryContext old_context = MemoryContextSwitchTo(CurTransactionContext);
|
||||
SubXactContext *state = linitial(activeSubXactContexts);
|
||||
|
||||
/*
|
||||
* the previous activeSetStmts is already invalid because it's in the now-
|
||||
* aborted subxact (what we're popping), so no need to free before assign-
|
||||
* ing with the setLocalCmds of the popped context
|
||||
*/
|
||||
Assert(state->subId == subId);
|
||||
activeSetStmts = state->setLocalCmds;
|
||||
activeSubXactContexts = list_delete_first(activeSubXactContexts);
|
||||
|
||||
MemoryContextSwitchTo(old_context);
|
||||
/*
|
||||
* Free activeSetStmts to avoid memory leaks when we create subxacts
|
||||
* for each row, e.g. in exception handling of UDFs.
|
||||
*/
|
||||
if (activeSetStmts != NULL)
|
||||
{
|
||||
pfree(activeSetStmts->data);
|
||||
pfree(activeSetStmts);
|
||||
}
|
||||
|
||||
/*
|
||||
* SET LOCAL commands are local to subxact blocks. When a subxact commits
|
||||
* or rolls back, we should roll back our set of SET LOCAL commands to the
|
||||
* ones we had in the upper commit.
|
||||
*/
|
||||
activeSetStmts = state->setLocalCmds;
|
||||
|
||||
/*
|
||||
* Free state to avoid memory leaks when we create subxacts for each row,
|
||||
* e.g. in exception handling of UDFs.
|
||||
*/
|
||||
pfree(state);
|
||||
|
||||
activeSubXactContexts = list_delete_first(activeSubXactContexts);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -45,7 +45,12 @@ static void SendCommandToWorkersParamsInternal(TargetWorkerSet targetWorkerSet,
|
|||
static void ErrorIfAnyMetadataNodeOutOfSync(List *metadataNodeList);
|
||||
static void SendCommandListToAllWorkersInternal(List *commandList, bool failOnError,
|
||||
char *superuser);
|
||||
|
||||
static List * OpenConnectionsToWorkersInParallel(TargetWorkerSet targetWorkerSet, const
|
||||
char *user);
|
||||
static void GetConnectionsResults(List *connectionList, bool failOnError);
|
||||
static void SendCommandToWorkersOutsideTransaction(TargetWorkerSet targetWorkerSet, const
|
||||
char *command, const char *user, bool
|
||||
failOnError);
|
||||
|
||||
/*
|
||||
* SendCommandToWorker sends a command to a particular worker as part of the
|
||||
|
@ -90,7 +95,7 @@ void
|
|||
SendCommandToWorkerAsUser(char *nodeName, int32 nodePort, const char *nodeUser,
|
||||
const char *command)
|
||||
{
|
||||
uint connectionFlags = 0;
|
||||
uint32 connectionFlags = 0;
|
||||
|
||||
UseCoordinatedTransaction();
|
||||
CoordinatedTransactionUse2PC();
|
||||
|
@ -329,6 +334,128 @@ SendCommandToMetadataWorkersParams(const char *command,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* SendCommandToWorkersOptionalInParallel sends the given command to workers in parallel.
|
||||
* It does error if there is a problem while sending the query, but it doesn't error
|
||||
* if there is a problem while executing the query.
|
||||
*/
|
||||
void
|
||||
SendCommandToWorkersOptionalInParallel(TargetWorkerSet targetWorkerSet, const
|
||||
char *command,
|
||||
const char *user)
|
||||
{
|
||||
bool failOnError = false;
|
||||
SendCommandToWorkersOutsideTransaction(targetWorkerSet, command, user,
|
||||
failOnError);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SendCommandToWorkersInParallel sends the given command to workers in parallel.
|
||||
* It does error if there is a problem while sending the query, it errors if there
|
||||
* was any problem when sending/receiving.
|
||||
*/
|
||||
void
|
||||
SendCommandToWorkersInParallel(TargetWorkerSet targetWorkerSet, const
|
||||
char *command,
|
||||
const char *user)
|
||||
{
|
||||
bool failOnError = true;
|
||||
SendCommandToWorkersOutsideTransaction(targetWorkerSet, command, user,
|
||||
failOnError);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SendCommandToWorkersOutsideTransaction sends the given command to workers in parallel.
|
||||
*/
|
||||
static void
|
||||
SendCommandToWorkersOutsideTransaction(TargetWorkerSet targetWorkerSet, const
|
||||
char *command, const char *user, bool
|
||||
failOnError)
|
||||
{
|
||||
ListCell *connectionCell = NULL;
|
||||
|
||||
List *connectionList = OpenConnectionsToWorkersInParallel(targetWorkerSet, user);
|
||||
|
||||
/* finish opening connections */
|
||||
FinishConnectionListEstablishment(connectionList);
|
||||
|
||||
/* send commands in parallel */
|
||||
foreach(connectionCell, connectionList)
|
||||
{
|
||||
MultiConnection *connection = (MultiConnection *) lfirst(connectionCell);
|
||||
|
||||
int querySent = SendRemoteCommand(connection, command);
|
||||
if (failOnError && querySent == 0)
|
||||
{
|
||||
ReportConnectionError(connection, ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
GetConnectionsResults(connectionList, failOnError);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* OpenConnectionsToWorkersInParallel opens connections to the given target worker set in parallel,
|
||||
* as the given user.
|
||||
*/
|
||||
static List *
|
||||
OpenConnectionsToWorkersInParallel(TargetWorkerSet targetWorkerSet, const char *user)
|
||||
{
|
||||
ListCell *workerNodeCell = NULL;
|
||||
List *connectionList = NIL;
|
||||
|
||||
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet, ShareLock);
|
||||
|
||||
foreach(workerNodeCell, workerNodeList)
|
||||
{
|
||||
WorkerNode *workerNode = (WorkerNode *) lfirst(workerNodeCell);
|
||||
char *nodeName = workerNode->workerName;
|
||||
int nodePort = workerNode->workerPort;
|
||||
int32 connectionFlags = OUTSIDE_TRANSACTION;
|
||||
|
||||
MultiConnection *connection = StartNodeUserDatabaseConnection(connectionFlags,
|
||||
nodeName, nodePort,
|
||||
user, NULL);
|
||||
connectionList = lappend(connectionList, connection);
|
||||
}
|
||||
return connectionList;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetConnectionsResults gets remote command results
|
||||
* for the given connections. It raises any error if failOnError is true.
|
||||
*/
|
||||
static void
|
||||
GetConnectionsResults(List *connectionList, bool failOnError)
|
||||
{
|
||||
ListCell *connectionCell = NULL;
|
||||
|
||||
foreach(connectionCell, connectionList)
|
||||
{
|
||||
MultiConnection *connection = (MultiConnection *) lfirst(connectionCell);
|
||||
bool raiseInterrupt = false;
|
||||
PGresult *result = GetRemoteCommandResult(connection, raiseInterrupt);
|
||||
|
||||
bool isResponseOK = result != NULL && IsResponseOK(result);
|
||||
if (failOnError && !isResponseOK)
|
||||
{
|
||||
ReportResultError(connection, result, ERROR);
|
||||
}
|
||||
|
||||
PQclear(result);
|
||||
|
||||
if (isResponseOK)
|
||||
{
|
||||
ForgetResults(connection);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SendCommandToWorkersParamsInternal sends a command to all workers in parallel.
|
||||
* Commands are committed on the workers when the local transaction commits. The
|
||||
|
|
|
@ -33,6 +33,8 @@
|
|||
#include "utils/snapmgr.h"
|
||||
|
||||
#include "distributed/citus_acquire_lock.h"
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "distributed/connection_management.h"
|
||||
#include "distributed/version_compat.h"
|
||||
|
||||
/* forward declaration of background worker entrypoint */
|
||||
|
@ -41,7 +43,6 @@ extern void LockAcquireHelperMain(Datum main_arg);
|
|||
/* forward declaration of helper functions */
|
||||
static void lock_acquire_helper_sigterm(SIGNAL_ARGS);
|
||||
static void EnsureStopLockAcquireHelper(void *arg);
|
||||
static long DeadlineTimestampTzToTimeout(TimestampTz deadline);
|
||||
|
||||
/* LockAcquireHelperArgs contains extra arguments to be used to start the worker */
|
||||
typedef struct LockAcquireHelperArgs
|
||||
|
@ -74,27 +75,21 @@ StartLockAcquireHelperBackgroundWorker(int backendToHelp, int32 lock_cooldown)
|
|||
args.lock_cooldown = lock_cooldown;
|
||||
|
||||
/* construct the background worker and start it */
|
||||
snprintf(worker.bgw_name, BGW_MAXLEN,
|
||||
"Citus Lock Acquire Helper: %d/%u",
|
||||
backendToHelp, MyDatabaseId);
|
||||
snprintf(worker.bgw_type, BGW_MAXLEN, "citus_lock_aqcuire");
|
||||
SafeSnprintf(worker.bgw_name, sizeof(worker.bgw_name),
|
||||
"Citus Lock Acquire Helper: %d/%u", backendToHelp, MyDatabaseId);
|
||||
strcpy_s(worker.bgw_type, sizeof(worker.bgw_type), "citus_lock_aqcuire");
|
||||
|
||||
worker.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
|
||||
worker.bgw_start_time = BgWorkerStart_RecoveryFinished;
|
||||
worker.bgw_restart_time = BGW_NEVER_RESTART;
|
||||
|
||||
snprintf(worker.bgw_library_name, BGW_MAXLEN, "citus");
|
||||
snprintf(worker.bgw_function_name, BGW_MAXLEN, "LockAcquireHelperMain");
|
||||
strcpy_s(worker.bgw_library_name, sizeof(worker.bgw_library_name), "citus");
|
||||
strcpy_s(worker.bgw_function_name, sizeof(worker.bgw_function_name),
|
||||
"LockAcquireHelperMain");
|
||||
worker.bgw_main_arg = Int32GetDatum(backendToHelp);
|
||||
worker.bgw_notify_pid = 0;
|
||||
|
||||
/*
|
||||
* we check if args fits in bgw_extra to make sure it is safe to copy the data. Once
|
||||
* we exceed the size of data to copy this way we need to look into a different way of
|
||||
* passing the arguments to the worker.
|
||||
*/
|
||||
Assert(sizeof(worker.bgw_extra) >= sizeof(args));
|
||||
memcpy(worker.bgw_extra, &args, sizeof(args));
|
||||
memcpy_s(worker.bgw_extra, sizeof(worker.bgw_extra), &args, sizeof(args));
|
||||
|
||||
if (!RegisterDynamicBackgroundWorker(&worker, &handle))
|
||||
{
|
||||
|
@ -242,8 +237,6 @@ LockAcquireHelperMain(Datum main_arg)
|
|||
|
||||
while (ShouldAcquireLock(100))
|
||||
{
|
||||
int row = 0;
|
||||
|
||||
elog(LOG, "canceling competing backends for backend %d", backendPid);
|
||||
|
||||
/*
|
||||
|
@ -261,7 +254,7 @@ LockAcquireHelperMain(Datum main_arg)
|
|||
|
||||
if (spiStatus == SPI_OK_SELECT)
|
||||
{
|
||||
for (row = 0; row < SPI_processed; row++)
|
||||
for (uint64 row = 0; row < SPI_processed; row++)
|
||||
{
|
||||
bool isnull = false;
|
||||
|
||||
|
@ -306,18 +299,3 @@ LockAcquireHelperMain(Datum main_arg)
|
|||
/* safely got to the end, exit without problem */
|
||||
proc_exit(0);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DeadlineTimestampTzToTimeout returns the numer of miliseconds that still need to elapse
|
||||
* before the deadline provided as an argument will be reached. The outcome can be used to
|
||||
* pass to the Wait of an EventSet to make sure it returns after the timeout has passed.
|
||||
*/
|
||||
static long
|
||||
DeadlineTimestampTzToTimeout(TimestampTz deadline)
|
||||
{
|
||||
long secs = 0;
|
||||
int msecs = 0;
|
||||
TimestampDifference(GetCurrentTimestamp(), deadline, &secs, &msecs);
|
||||
return secs * 1000 + msecs / 1000;
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "catalog/pg_proc.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "distributed/version_compat.h"
|
||||
#include "nodes/nodeFuncs.h"
|
||||
#include "utils/acl.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/datum.h"
|
||||
|
@ -62,6 +63,9 @@ static StypeBox * TryCreateStypeBoxFromFcinfoAggref(FunctionCallInfo fcinfo);
|
|||
static void HandleTransition(StypeBox *box, FunctionCallInfo fcinfo,
|
||||
FunctionCallInfo innerFcinfo);
|
||||
static void HandleStrictUninit(StypeBox *box, FunctionCallInfo fcinfo, Datum value);
|
||||
static bool TypecheckWorkerPartialAggArgType(FunctionCallInfo fcinfo, StypeBox *box);
|
||||
static bool TypecheckCoordCombineAggReturnType(FunctionCallInfo fcinfo, Oid ffunc,
|
||||
StypeBox *box);
|
||||
|
||||
/*
|
||||
* GetAggregateForm loads corresponding tuple & Form_pg_aggregate for oid
|
||||
|
@ -346,6 +350,12 @@ worker_partial_agg_sfunc(PG_FUNCTION_ARGS)
|
|||
{
|
||||
box = pallocInAggContext(fcinfo, sizeof(StypeBox));
|
||||
box->agg = PG_GETARG_OID(1);
|
||||
|
||||
if (!TypecheckWorkerPartialAggArgType(fcinfo, box))
|
||||
{
|
||||
ereport(ERROR, (errmsg(
|
||||
"worker_partial_agg_sfunc could not confirm type correctness")));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -617,6 +627,12 @@ coord_combine_agg_ffunc(PG_FUNCTION_ARGS)
|
|||
bool fextra = aggform->aggfinalextra;
|
||||
ReleaseSysCache(aggtuple);
|
||||
|
||||
if (!TypecheckCoordCombineAggReturnType(fcinfo, ffunc, box))
|
||||
{
|
||||
ereport(ERROR, (errmsg(
|
||||
"coord_combine_agg_ffunc could not confirm type correctness")));
|
||||
}
|
||||
|
||||
if (ffunc == InvalidOid)
|
||||
{
|
||||
if (box->valueNull)
|
||||
|
@ -656,3 +672,74 @@ coord_combine_agg_ffunc(PG_FUNCTION_ARGS)
|
|||
fcinfo->isnull = innerFcinfo->isnull;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TypecheckWorkerPartialAggArgType returns whether the arguments being passed to
|
||||
* worker_partial_agg match the arguments expected by the aggregate being distributed.
|
||||
*/
|
||||
static bool
|
||||
TypecheckWorkerPartialAggArgType(FunctionCallInfo fcinfo, StypeBox *box)
|
||||
{
|
||||
Aggref *aggref = AggGetAggref(fcinfo);
|
||||
if (aggref == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
Assert(list_length(aggref->args) == 2);
|
||||
TargetEntry *aggarg = list_nth(aggref->args, 1);
|
||||
|
||||
bool argtypesNull;
|
||||
HeapTuple proctuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(box->agg));
|
||||
if (!HeapTupleIsValid(proctuple))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
Datum argtypes = SysCacheGetAttr(PROCOID, proctuple,
|
||||
Anum_pg_proc_proargtypes,
|
||||
&argtypesNull);
|
||||
Assert(!argtypesNull);
|
||||
ReleaseSysCache(proctuple);
|
||||
|
||||
if (ARR_NDIM(DatumGetArrayTypeP(argtypes)) != 1 ||
|
||||
ARR_DIMS(DatumGetArrayTypeP(argtypes))[0] != 1)
|
||||
{
|
||||
elog(ERROR, "worker_partial_agg_sfunc cannot type check aggregates "
|
||||
"taking anything other than 1 argument");
|
||||
}
|
||||
|
||||
int arrayIndex = 0;
|
||||
Datum argtype = array_get_element(argtypes,
|
||||
1, &arrayIndex, -1, sizeof(Oid), true, 'i',
|
||||
&argtypesNull);
|
||||
Assert(!argtypesNull);
|
||||
|
||||
return aggarg != NULL && exprType((Node *) aggarg->expr) == DatumGetObjectId(argtype);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TypecheckCoordCombineAggReturnType returns whether the return type of the aggregate
|
||||
* being distributed by coord_combine_agg matches the null constant used to inform postgres
|
||||
* what the aggregate's expected return type is.
|
||||
*/
|
||||
static bool
|
||||
TypecheckCoordCombineAggReturnType(FunctionCallInfo fcinfo, Oid ffunc, StypeBox *box)
|
||||
{
|
||||
Aggref *aggref = AggGetAggref(fcinfo);
|
||||
if (aggref == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
Oid finalType = ffunc == InvalidOid ?
|
||||
box->transtype : get_func_rettype(ffunc);
|
||||
|
||||
Assert(list_length(aggref->args) == 3);
|
||||
TargetEntry *nulltag = list_nth(aggref->args, 2);
|
||||
|
||||
return nulltag != NULL && IsA(nulltag->expr, Const) &&
|
||||
((Const *) nulltag->expr)->consttype == finalType;
|
||||
}
|
||||
|
|
|
@ -45,6 +45,11 @@ static bool ShouldEvaluateFunctionWithMasterContext(MasterEvaluationContext *
|
|||
bool
|
||||
RequiresMasterEvaluation(Query *query)
|
||||
{
|
||||
if (query->commandType == CMD_SELECT)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return FindNodeCheck((Node *) query, CitusIsMutableFunction);
|
||||
}
|
||||
|
||||
|
|
|
@ -86,6 +86,7 @@ copyJobInfo(Job *newnode, Job *from)
|
|||
COPY_SCALAR_FIELD(deferredPruning);
|
||||
COPY_NODE_FIELD(partitionKeyValue);
|
||||
COPY_NODE_FIELD(localPlannedStatements);
|
||||
COPY_SCALAR_FIELD(parametersInJobQueryResolved);
|
||||
}
|
||||
|
||||
|
||||
|
@ -274,6 +275,7 @@ CopyNodeTask(COPYFUNC_ARGS)
|
|||
COPY_NODE_FIELD(relationRowLockList);
|
||||
COPY_NODE_FIELD(rowValuesLists);
|
||||
COPY_SCALAR_FIELD(partiallyLocalOrRemote);
|
||||
COPY_SCALAR_FIELD(parametersInQueryStringResolved);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -364,7 +364,7 @@ EqualUnsupportedCitusNode(const struct ExtensibleNode *a,
|
|||
CopyNode##type, \
|
||||
EqualUnsupportedCitusNode, \
|
||||
Out##type, \
|
||||
Read##type \
|
||||
ReadUnsupportedCitusNode \
|
||||
}
|
||||
|
||||
#define DEFINE_NODE_METHODS_NO_READ(type) \
|
||||
|
|
|
@ -339,6 +339,7 @@ OutJobFields(StringInfo str, const Job *node)
|
|||
WRITE_BOOL_FIELD(deferredPruning);
|
||||
WRITE_NODE_FIELD(partitionKeyValue);
|
||||
WRITE_NODE_FIELD(localPlannedStatements);
|
||||
WRITE_BOOL_FIELD(parametersInJobQueryResolved);
|
||||
}
|
||||
|
||||
|
||||
|
@ -492,6 +493,7 @@ OutTask(OUTFUNC_ARGS)
|
|||
WRITE_NODE_FIELD(relationRowLockList);
|
||||
WRITE_NODE_FIELD(rowValuesLists);
|
||||
WRITE_BOOL_FIELD(partiallyLocalOrRemote);
|
||||
WRITE_BOOL_FIELD(parametersInQueryStringResolved);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -11,443 +11,10 @@
|
|||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "distributed/citus_nodefuncs.h"
|
||||
#include "distributed/errormessage.h"
|
||||
#include "distributed/log_utils.h"
|
||||
#include "distributed/distributed_planner.h"
|
||||
#include "distributed/multi_server_executor.h"
|
||||
#include "nodes/parsenodes.h"
|
||||
#include "nodes/readfuncs.h"
|
||||
#include "utils/builtins.h"
|
||||
|
||||
|
||||
/*
|
||||
* Macros to simplify reading of different kinds of fields. Use these
|
||||
* wherever possible to reduce the chance for silly typos. Note that these
|
||||
* hard-wire conventions about the names of the local variables in a Read
|
||||
* routine.
|
||||
*/
|
||||
|
||||
/* Macros for declaring appropriate local variables */
|
||||
/* A few guys need only local_node */
|
||||
static inline Node *
|
||||
CitusSetTag(Node *node, int tag)
|
||||
{
|
||||
CitusNode *citus_node = (CitusNode *) node;
|
||||
citus_node->citus_tag = tag;
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
/* *INDENT-OFF* */
|
||||
#define READ_LOCALS_NO_FIELDS(nodeTypeName) \
|
||||
nodeTypeName *local_node = (nodeTypeName *) CitusSetTag((Node *) node, T_##nodeTypeName)
|
||||
|
||||
/* And a few guys need only the pg_strtok support fields */
|
||||
#if PG_VERSION_NUM >= 120000
|
||||
#define READ_TEMP_LOCALS() \
|
||||
const char *token; \
|
||||
int length
|
||||
#else
|
||||
#define READ_TEMP_LOCALS() \
|
||||
char *token; \
|
||||
int length
|
||||
#endif
|
||||
|
||||
/* ... but most need both */
|
||||
#define READ_LOCALS(nodeTypeName) \
|
||||
READ_LOCALS_NO_FIELDS(nodeTypeName); \
|
||||
READ_TEMP_LOCALS()
|
||||
|
||||
/* Read an integer field (anything written as ":fldname %d") */
|
||||
#define READ_INT_FIELD(fldname) \
|
||||
token = pg_strtok(&length); /* skip :fldname */ \
|
||||
token = pg_strtok(&length); /* get field value */ \
|
||||
local_node->fldname = atoi(token)
|
||||
|
||||
/* Read an 64-bit integer field (anything written as ":fldname %d") */
|
||||
#define READ_INT64_FIELD(fldname) \
|
||||
token = pg_strtok(&length); /* skip :fldname */ \
|
||||
token = pg_strtok(&length); /* get field value */ \
|
||||
local_node->fldname = (int64) strtoll(token, NULL, 10)
|
||||
|
||||
/* Read an unsigned integer field (anything written as ":fldname %u") */
|
||||
#define READ_UINT_FIELD(fldname) \
|
||||
token = pg_strtok(&length); /* skip :fldname */ \
|
||||
token = pg_strtok(&length); /* get field value */ \
|
||||
local_node->fldname = atoui(token)
|
||||
|
||||
/* XXX: CITUS Read an uint64 field (anything written as ":fldname %u") */
|
||||
#define READ_UINT64_FIELD(fldname) \
|
||||
token = pg_strtok(&length); /* skip :fldname */ \
|
||||
token = pg_strtok(&length); /* get field value */ \
|
||||
local_node->fldname = atoull(token)
|
||||
|
||||
/* Read an OID field (don't hard-wire assumption that OID is same as uint) */
|
||||
#define READ_OID_FIELD(fldname) \
|
||||
token = pg_strtok(&length); /* skip :fldname */ \
|
||||
token = pg_strtok(&length); /* get field value */ \
|
||||
local_node->fldname = atooid(token)
|
||||
|
||||
/* Read a char field (ie, one ascii character) */
|
||||
#define READ_CHAR_FIELD(fldname) \
|
||||
token = pg_strtok(&length); /* skip :fldname */ \
|
||||
token = pg_strtok(&length); /* get field value */ \
|
||||
local_node->fldname = token[0]
|
||||
|
||||
/* Read an enumerated-type field that was written as an integer code */
|
||||
#define READ_ENUM_FIELD(fldname, enumtype) \
|
||||
token = pg_strtok(&length); /* skip :fldname */ \
|
||||
token = pg_strtok(&length); /* get field value */ \
|
||||
local_node->fldname = (enumtype) atoi(token)
|
||||
|
||||
/* Read a float field */
|
||||
#define READ_FLOAT_FIELD(fldname) \
|
||||
token = pg_strtok(&length); /* skip :fldname */ \
|
||||
token = pg_strtok(&length); /* get field value */ \
|
||||
local_node->fldname = atof(token)
|
||||
|
||||
/* Read a boolean field */
|
||||
#define READ_BOOL_FIELD(fldname) \
|
||||
token = pg_strtok(&length); /* skip :fldname */ \
|
||||
token = pg_strtok(&length); /* get field value */ \
|
||||
local_node->fldname = strtobool(token)
|
||||
|
||||
/* Read a character-string field */
|
||||
#define READ_STRING_FIELD(fldname) \
|
||||
token = pg_strtok(&length); /* skip :fldname */ \
|
||||
token = pg_strtok(&length); /* get field value */ \
|
||||
local_node->fldname = nullable_string(token, length)
|
||||
|
||||
/* Read a parse location field (and throw away the value, per notes above) */
|
||||
#define READ_LOCATION_FIELD(fldname) \
|
||||
token = pg_strtok(&length); /* skip :fldname */ \
|
||||
token = pg_strtok(&length); /* get field value */ \
|
||||
(void) token; /* in case not used elsewhere */ \
|
||||
local_node->fldname = -1 /* set field to "unknown" */
|
||||
|
||||
/* Read a Node field */
|
||||
#define READ_NODE_FIELD(fldname) \
|
||||
token = pg_strtok(&length); /* skip :fldname */ \
|
||||
(void) token; /* in case not used elsewhere */ \
|
||||
local_node->fldname = nodeRead(NULL, 0)
|
||||
|
||||
/* Read an integer field (anything written as ":fldname %d") */
|
||||
#define READ_ENUM_ARRAY(fldname, count, enumtype) \
|
||||
token = pg_strtok(&length); /* skip :fldname */ \
|
||||
token = pg_strtok(&length); /* skip ( */ \
|
||||
{ \
|
||||
int i = 0; \
|
||||
for (i = 0; i < count; i++ ) \
|
||||
{ \
|
||||
token = pg_strtok(&length); /* get field value */ \
|
||||
local_node->fldname[i] = (enumtype) atoi(token); \
|
||||
} \
|
||||
} \
|
||||
token = pg_strtok(&length); /* skip ) */ \
|
||||
(void) token
|
||||
|
||||
#define READ_INT_ARRAY(fldname, count) READ_ENUM_ARRAY(fldname, count, int32)
|
||||
|
||||
/* Routine exit */
|
||||
#define READ_DONE() \
|
||||
return;
|
||||
|
||||
/*
|
||||
* NOTE: use atoi() to read values written with %d, or atoui() to read
|
||||
* values written with %u in outfuncs.c. An exception is OID values,
|
||||
* for which use atooid(). (As of 7.1, outfuncs.c writes OIDs as %u,
|
||||
* but this will probably change in the future.)
|
||||
*/
|
||||
#define atoui(x) ((unsigned int) strtoul((x), NULL, 10))
|
||||
|
||||
#define atooid(x) ((Oid) strtoul((x), NULL, 10))
|
||||
|
||||
/* XXX: Citus */
|
||||
#define atoull(x) ((uint64) pg_strtouint64((x), NULL, 10))
|
||||
|
||||
#define strtobool(x) ((*(x) == 't') ? true : false)
|
||||
|
||||
#define nullable_string(token,length) \
|
||||
((length) == 0 ? NULL : debackslash(token, length))
|
||||
|
||||
|
||||
static void
|
||||
readJobInfo(Job *local_node)
|
||||
{
|
||||
READ_TEMP_LOCALS();
|
||||
|
||||
CitusSetTag((Node *) local_node, T_Job);
|
||||
|
||||
READ_UINT64_FIELD(jobId);
|
||||
READ_NODE_FIELD(jobQuery);
|
||||
READ_NODE_FIELD(taskList);
|
||||
READ_NODE_FIELD(dependentJobList);
|
||||
READ_BOOL_FIELD(subqueryPushdown);
|
||||
READ_BOOL_FIELD(requiresMasterEvaluation);
|
||||
READ_BOOL_FIELD(deferredPruning);
|
||||
READ_NODE_FIELD(partitionKeyValue);
|
||||
READ_NODE_FIELD(localPlannedStatements);
|
||||
}
|
||||
|
||||
|
||||
READFUNC_RET
|
||||
ReadJob(READFUNC_ARGS)
|
||||
{
|
||||
READ_LOCALS_NO_FIELDS(Job);
|
||||
|
||||
readJobInfo(local_node);
|
||||
|
||||
READ_DONE();
|
||||
}
|
||||
|
||||
|
||||
READFUNC_RET
|
||||
ReadDistributedPlan(READFUNC_ARGS)
|
||||
{
|
||||
READ_LOCALS(DistributedPlan);
|
||||
|
||||
READ_UINT64_FIELD(planId);
|
||||
READ_ENUM_FIELD(modLevel, RowModifyLevel);
|
||||
READ_BOOL_FIELD(hasReturning);
|
||||
READ_BOOL_FIELD(routerExecutable);
|
||||
|
||||
READ_NODE_FIELD(workerJob);
|
||||
READ_NODE_FIELD(masterQuery);
|
||||
READ_UINT64_FIELD(queryId);
|
||||
READ_NODE_FIELD(relationIdList);
|
||||
READ_OID_FIELD(targetRelationId);
|
||||
READ_NODE_FIELD(insertSelectQuery);
|
||||
READ_STRING_FIELD(intermediateResultIdPrefix);
|
||||
|
||||
READ_NODE_FIELD(subPlanList);
|
||||
READ_NODE_FIELD(usedSubPlanNodeList);
|
||||
READ_BOOL_FIELD(fastPathRouterPlan);
|
||||
|
||||
READ_NODE_FIELD(planningError);
|
||||
|
||||
READ_DONE();
|
||||
}
|
||||
|
||||
|
||||
READFUNC_RET
|
||||
ReadDistributedSubPlan(READFUNC_ARGS)
|
||||
{
|
||||
READ_LOCALS(DistributedSubPlan);
|
||||
|
||||
READ_UINT_FIELD(subPlanId);
|
||||
READ_NODE_FIELD(plan);
|
||||
|
||||
READ_DONE();
|
||||
}
|
||||
|
||||
|
||||
READFUNC_RET
|
||||
ReadUsedDistributedSubPlan(READFUNC_ARGS)
|
||||
{
|
||||
READ_LOCALS(UsedDistributedSubPlan);
|
||||
|
||||
READ_STRING_FIELD(subPlanId);
|
||||
READ_INT_FIELD(locationMask);
|
||||
}
|
||||
|
||||
|
||||
READFUNC_RET
|
||||
ReadShardInterval(READFUNC_ARGS)
|
||||
{
|
||||
READ_LOCALS(ShardInterval);
|
||||
|
||||
READ_OID_FIELD(relationId);
|
||||
READ_CHAR_FIELD(storageType);
|
||||
READ_OID_FIELD(valueTypeId);
|
||||
READ_INT_FIELD(valueTypeLen);
|
||||
READ_BOOL_FIELD(valueByVal);
|
||||
READ_BOOL_FIELD(minValueExists);
|
||||
READ_BOOL_FIELD(maxValueExists);
|
||||
|
||||
token = pg_strtok(&length); /* skip :minValue */
|
||||
if (!local_node->minValueExists)
|
||||
token = pg_strtok(&length); /* skip "<>" */
|
||||
else
|
||||
local_node->minValue = readDatum(local_node->valueByVal);
|
||||
|
||||
token = pg_strtok(&length); /* skip :maxValue */
|
||||
if (!local_node->minValueExists)
|
||||
token = pg_strtok(&length); /* skip "<>" */
|
||||
else
|
||||
local_node->maxValue = readDatum(local_node->valueByVal);
|
||||
|
||||
READ_UINT64_FIELD(shardId);
|
||||
READ_INT_FIELD(shardIndex);
|
||||
|
||||
READ_DONE();
|
||||
}
|
||||
|
||||
|
||||
READFUNC_RET
|
||||
ReadMapMergeJob(READFUNC_ARGS)
|
||||
{
|
||||
|
||||
READ_LOCALS(MapMergeJob);
|
||||
|
||||
readJobInfo(&local_node->job);
|
||||
|
||||
READ_NODE_FIELD(reduceQuery);
|
||||
READ_ENUM_FIELD(partitionType, PartitionType);
|
||||
READ_NODE_FIELD(partitionColumn);
|
||||
READ_UINT_FIELD(partitionCount);
|
||||
READ_INT_FIELD(sortedShardIntervalArrayLength);
|
||||
|
||||
int arrayLength = local_node->sortedShardIntervalArrayLength;
|
||||
|
||||
/* now build & read sortedShardIntervalArray */
|
||||
local_node->sortedShardIntervalArray =
|
||||
(ShardInterval**) palloc(arrayLength * sizeof(ShardInterval *));
|
||||
|
||||
for (int i = 0; i < arrayLength; ++i)
|
||||
{
|
||||
/* can't use READ_NODE_FIELD, no field names */
|
||||
local_node->sortedShardIntervalArray[i] = nodeRead(NULL, 0);
|
||||
}
|
||||
|
||||
READ_NODE_FIELD(mapTaskList);
|
||||
READ_NODE_FIELD(mergeTaskList);
|
||||
|
||||
READ_DONE();
|
||||
}
|
||||
|
||||
|
||||
READFUNC_RET
|
||||
ReadShardPlacement(READFUNC_ARGS)
|
||||
{
|
||||
READ_LOCALS(ShardPlacement);
|
||||
|
||||
READ_UINT64_FIELD(placementId);
|
||||
READ_UINT64_FIELD(shardId);
|
||||
READ_UINT64_FIELD(shardLength);
|
||||
READ_ENUM_FIELD(shardState, ShardState);
|
||||
READ_INT_FIELD(groupId);
|
||||
READ_STRING_FIELD(nodeName);
|
||||
READ_UINT_FIELD(nodePort);
|
||||
READ_UINT_FIELD(nodeId);
|
||||
/* so we can deal with 0 */
|
||||
READ_INT_FIELD(partitionMethod);
|
||||
READ_UINT_FIELD(colocationGroupId);
|
||||
READ_UINT_FIELD(representativeValue);
|
||||
|
||||
READ_DONE();
|
||||
}
|
||||
|
||||
|
||||
READFUNC_RET
|
||||
ReadGroupShardPlacement(READFUNC_ARGS)
|
||||
{
|
||||
READ_LOCALS(GroupShardPlacement);
|
||||
|
||||
READ_UINT64_FIELD(placementId);
|
||||
READ_UINT64_FIELD(shardId);
|
||||
READ_UINT64_FIELD(shardLength);
|
||||
READ_ENUM_FIELD(shardState, ShardState);
|
||||
READ_INT_FIELD(groupId);
|
||||
|
||||
READ_DONE();
|
||||
}
|
||||
|
||||
|
||||
READFUNC_RET
|
||||
ReadRelationShard(READFUNC_ARGS)
|
||||
{
|
||||
READ_LOCALS(RelationShard);
|
||||
|
||||
READ_OID_FIELD(relationId);
|
||||
READ_UINT64_FIELD(shardId);
|
||||
|
||||
READ_DONE();
|
||||
}
|
||||
|
||||
|
||||
READFUNC_RET
|
||||
ReadRelationRowLock(READFUNC_ARGS)
|
||||
{
|
||||
READ_LOCALS(RelationRowLock);
|
||||
|
||||
READ_OID_FIELD(relationId);
|
||||
READ_ENUM_FIELD(rowLockStrength, LockClauseStrength);
|
||||
|
||||
READ_DONE();
|
||||
}
|
||||
|
||||
|
||||
READFUNC_RET
|
||||
ReadTask(READFUNC_ARGS)
|
||||
{
|
||||
READ_LOCALS(Task);
|
||||
|
||||
READ_ENUM_FIELD(taskType, TaskType);
|
||||
READ_UINT64_FIELD(jobId);
|
||||
READ_UINT_FIELD(taskId);
|
||||
READ_NODE_FIELD(queryForLocalExecution);
|
||||
READ_STRING_FIELD(queryStringLazy);
|
||||
READ_OID_FIELD(anchorDistributedTableId);
|
||||
READ_UINT64_FIELD(anchorShardId);
|
||||
READ_NODE_FIELD(taskPlacementList);
|
||||
READ_NODE_FIELD(dependentTaskList);
|
||||
READ_UINT_FIELD(partitionId);
|
||||
READ_UINT_FIELD(upstreamTaskId);
|
||||
READ_NODE_FIELD(shardInterval);
|
||||
READ_BOOL_FIELD(assignmentConstrained);
|
||||
READ_NODE_FIELD(taskExecution);
|
||||
READ_CHAR_FIELD(replicationModel);
|
||||
READ_BOOL_FIELD(modifyWithSubquery);
|
||||
READ_NODE_FIELD(relationShardList);
|
||||
READ_NODE_FIELD(relationRowLockList);
|
||||
READ_NODE_FIELD(rowValuesLists);
|
||||
READ_BOOL_FIELD(partiallyLocalOrRemote);
|
||||
|
||||
READ_DONE();
|
||||
}
|
||||
|
||||
|
||||
READFUNC_RET
|
||||
ReadLocalPlannedStatement(READFUNC_ARGS)
|
||||
{
|
||||
READ_LOCALS(LocalPlannedStatement);
|
||||
|
||||
READ_UINT64_FIELD(shardId);
|
||||
READ_UINT_FIELD(localGroupId);
|
||||
READ_NODE_FIELD(localPlan);
|
||||
|
||||
READ_DONE();
|
||||
}
|
||||
|
||||
|
||||
READFUNC_RET
|
||||
ReadTaskExecution(READFUNC_ARGS)
|
||||
{
|
||||
ereport(ERROR, (errmsg("unexpected read request for TaskExecution node")));
|
||||
}
|
||||
|
||||
|
||||
READFUNC_RET
|
||||
ReadDeferredErrorMessage(READFUNC_ARGS)
|
||||
{
|
||||
READ_LOCALS(DeferredErrorMessage);
|
||||
|
||||
READ_INT_FIELD(code);
|
||||
READ_STRING_FIELD(message);
|
||||
READ_STRING_FIELD(detail);
|
||||
READ_STRING_FIELD(hint);
|
||||
READ_STRING_FIELD(filename);
|
||||
READ_INT_FIELD(linenumber);
|
||||
READ_STRING_FIELD(functionname);
|
||||
|
||||
READ_DONE();
|
||||
}
|
||||
|
||||
|
||||
READFUNC_RET
|
||||
void
|
||||
ReadUnsupportedCitusNode(READFUNC_ARGS)
|
||||
{
|
||||
ereport(ERROR, (errmsg("not implemented")));
|
||||
|
|
|
@ -0,0 +1,305 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* safe_lib.c
|
||||
*
|
||||
* This file contains all SafeXXXX helper functions that we implement to
|
||||
* replace missing xxxx_s functions implemented by safestringlib. It also
|
||||
* contains a constraint handler for use in both our SafeXXX and safestringlib
|
||||
* its xxxx_s functions.
|
||||
*
|
||||
* Copyright (c) Citus Data, Inc.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "safe_lib.h"
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "lib/stringinfo.h"
|
||||
|
||||
/*
|
||||
* In PG 11 pg_vsnprintf is not exported and compiled in most cases, in that
|
||||
* case use the copied one from pg11_snprintf.c
|
||||
* NOTE: Whenever removing this section also remove pg11_snprintf.c
|
||||
*/
|
||||
#if PG_VERSION_NUM < 120000
|
||||
extern int pg11_vsnprintf(char *str, size_t count, const char *fmt, va_list args);
|
||||
#define citus_vsnprintf pg11_vsnprintf
|
||||
#else
|
||||
#define citus_vsnprintf pg_vsnprintf
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* ereport_constraint_handler is a constraint handler that calls ereport. A
|
||||
* constraint handler is called whenever an error occurs in any of the
|
||||
* safestringlib xxxx_s functions or our SafeXXXX functions.
|
||||
*
|
||||
* More info on constraint handlers can be found here:
|
||||
* https://en.cppreference.com/w/c/error/set_constraint_handler_s
|
||||
*/
|
||||
void
|
||||
ereport_constraint_handler(const char *message,
|
||||
void *pointer,
|
||||
errno_t error)
|
||||
{
|
||||
if (message && error)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg(
|
||||
"Memory constraint error: %s (errno %d)", message, error)));
|
||||
}
|
||||
else if (message)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg(
|
||||
"Memory constraint error: %s", message)));
|
||||
}
|
||||
else if (error)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg(
|
||||
"Unknown function failed with memory constraint error (errno %d)",
|
||||
error)));
|
||||
}
|
||||
else
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg(
|
||||
"Unknown function failed with memory constraint error")));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SafeStringToInt64 converts a string containing a number to a int64. When it
|
||||
* fails it calls ereport.
|
||||
*
|
||||
* The different error cases are inspired by
|
||||
* https://stackoverflow.com/a/26083517/2570866
|
||||
*/
|
||||
int64
|
||||
SafeStringToInt64(const char *str)
|
||||
{
|
||||
char *endptr;
|
||||
errno = 0;
|
||||
long long number = strtoll(str, &endptr, 10);
|
||||
|
||||
if (str == endptr)
|
||||
{
|
||||
ereport(ERROR, (errmsg("Error parsing %s as int64, no digits found\n", str)));
|
||||
}
|
||||
else if ((errno == ERANGE && number == LLONG_MIN) || number < INT64_MIN)
|
||||
{
|
||||
ereport(ERROR, (errmsg("Error parsing %s as int64, underflow occured\n", str)));
|
||||
}
|
||||
else if ((errno == ERANGE && number == LLONG_MAX) || number > INT64_MAX)
|
||||
{
|
||||
ereport(ERROR, (errmsg("Error parsing %s as int64, overflow occured\n", str)));
|
||||
}
|
||||
else if (errno == EINVAL)
|
||||
{
|
||||
ereport(ERROR, (errmsg(
|
||||
"Error parsing %s as int64, base contains unsupported value\n",
|
||||
str)));
|
||||
}
|
||||
else if (errno != 0 && number == 0)
|
||||
{
|
||||
int err = errno;
|
||||
ereport(ERROR, (errmsg("Error parsing %s as int64, errno %d\n", str, err)));
|
||||
}
|
||||
else if (errno == 0 && str && *endptr != '\0')
|
||||
{
|
||||
ereport(ERROR, (errmsg(
|
||||
"Error parsing %s as int64, aditional characters remain after int64\n",
|
||||
str)));
|
||||
}
|
||||
return number;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SafeStringToUint64 converts a string containing a number to a uint64. When it
|
||||
* fails it calls ereport.
|
||||
*
|
||||
* The different error cases are inspired by
|
||||
* https://stackoverflow.com/a/26083517/2570866
|
||||
*/
|
||||
uint64
|
||||
SafeStringToUint64(const char *str)
|
||||
{
|
||||
char *endptr;
|
||||
errno = 0;
|
||||
unsigned long long number = strtoull(str, &endptr, 10);
|
||||
|
||||
if (str == endptr)
|
||||
{
|
||||
ereport(ERROR, (errmsg("Error parsing %s as uint64, no digits found\n", str)));
|
||||
}
|
||||
else if ((errno == ERANGE && number == ULLONG_MAX) || number > UINT64_MAX)
|
||||
{
|
||||
ereport(ERROR, (errmsg("Error parsing %s as uint64, overflow occured\n", str)));
|
||||
}
|
||||
else if (errno == EINVAL)
|
||||
{
|
||||
ereport(ERROR, (errmsg(
|
||||
"Error parsing %s as uint64, base contains unsupported value\n",
|
||||
str)));
|
||||
}
|
||||
else if (errno != 0 && number == 0)
|
||||
{
|
||||
int err = errno;
|
||||
ereport(ERROR, (errmsg("Error parsing %s as uint64, errno %d\n", str, err)));
|
||||
}
|
||||
else if (errno == 0 && str && *endptr != '\0')
|
||||
{
|
||||
ereport(ERROR, (errmsg(
|
||||
"Error parsing %s as uint64, aditional characters remain after uint64\n",
|
||||
str)));
|
||||
}
|
||||
return number;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SafeQsort is the non reentrant version of qsort (qsort vs qsort_r), but it
|
||||
* does the input checks required for qsort_s:
|
||||
* 1. count or size is greater than RSIZE_MAX
|
||||
* 2. ptr or comp is a null pointer (unless count is zero)
|
||||
* source: https://en.cppreference.com/w/c/algorithm/qsort
|
||||
*
|
||||
* When it hits these errors it calls the ereport_constraint_handler.
|
||||
*
|
||||
* NOTE: this functions calls pg_qsort instead of stdlib qsort.
|
||||
*/
|
||||
void
|
||||
SafeQsort(void *ptr, rsize_t count, rsize_t size,
|
||||
int (*comp)(const void *, const void *))
|
||||
{
|
||||
if (count > RSIZE_MAX_MEM)
|
||||
{
|
||||
ereport_constraint_handler("SafeQsort: count exceeds max",
|
||||
NULL, ESLEMAX);
|
||||
}
|
||||
|
||||
if (size > RSIZE_MAX_MEM)
|
||||
{
|
||||
ereport_constraint_handler("SafeQsort: size exceeds max",
|
||||
NULL, ESLEMAX);
|
||||
}
|
||||
if (size != 0)
|
||||
{
|
||||
if (ptr == NULL)
|
||||
{
|
||||
ereport_constraint_handler("SafeQsort: ptr is NULL", NULL, ESNULLP);
|
||||
}
|
||||
if (comp == NULL)
|
||||
{
|
||||
ereport_constraint_handler("SafeQsort: comp is NULL", NULL, ESNULLP);
|
||||
}
|
||||
}
|
||||
pg_qsort(ptr, count, size, comp);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SafeBsearch is a non reentrant version of bsearch, but it does the
|
||||
* input checks required for bsearch_s:
|
||||
* 1. count or size is greater than RSIZE_MAX
|
||||
* 2. key, ptr or comp is a null pointer (unless count is zero)
|
||||
* source: https://en.cppreference.com/w/c/algorithm/bsearch
|
||||
*
|
||||
* When it hits these errors it calls the ereport_constraint_handler.
|
||||
*
|
||||
* NOTE: this functions calls pg_qsort instead of stdlib qsort.
|
||||
*/
|
||||
void *
|
||||
SafeBsearch(const void *key, const void *ptr, rsize_t count, rsize_t size,
|
||||
int (*comp)(const void *, const void *))
|
||||
{
|
||||
if (count > RSIZE_MAX_MEM)
|
||||
{
|
||||
ereport_constraint_handler("SafeBsearch: count exceeds max",
|
||||
NULL, ESLEMAX);
|
||||
}
|
||||
|
||||
if (size > RSIZE_MAX_MEM)
|
||||
{
|
||||
ereport_constraint_handler("SafeBsearch: size exceeds max",
|
||||
NULL, ESLEMAX);
|
||||
}
|
||||
if (size != 0)
|
||||
{
|
||||
if (key == NULL)
|
||||
{
|
||||
ereport_constraint_handler("SafeBsearch: key is NULL", NULL, ESNULLP);
|
||||
}
|
||||
if (ptr == NULL)
|
||||
{
|
||||
ereport_constraint_handler("SafeBsearch: ptr is NULL", NULL, ESNULLP);
|
||||
}
|
||||
if (comp == NULL)
|
||||
{
|
||||
ereport_constraint_handler("SafeBsearch: comp is NULL", NULL, ESNULLP);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Explanation of IGNORE-BANNED:
|
||||
* bsearch is safe to use here since we check the same thing bsearch_s
|
||||
* does. We cannot use bsearch_s as a replacement, since it's not available
|
||||
* in safestringlib.
|
||||
*/
|
||||
return bsearch(key, ptr, count, size, comp); /* IGNORE-BANNED */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SafeSnprintf is a safer replacement for snprintf, which is needed since
|
||||
* safestringlib doesn't implement snprintf_s.
|
||||
*
|
||||
* The required failure modes of snprint_s are as follows (in parentheses if
|
||||
* this implements it and how):
|
||||
* 1. the conversion specifier %n is present in format (yes, %n is not
|
||||
* supported by pg_vsnprintf)
|
||||
* 2. any of the arguments corresponding to %s is a null pointer (half, checked
|
||||
* in postgres when asserts are enabled)
|
||||
* 3. format or buffer is a null pointer (yes, checked by this function)
|
||||
* 4. bufsz is zero or greater than RSIZE_MAX (yes, checked by this function)
|
||||
* 5. encoding errors occur in any of string and character conversion
|
||||
* specifiers (no clue what postgres does in this case)
|
||||
* source: https://en.cppreference.com/w/c/io/fprintf
|
||||
*/
|
||||
int
|
||||
SafeSnprintf(char *restrict buffer, rsize_t bufsz, const char *restrict format, ...)
|
||||
{
|
||||
/* failure mode 3 */
|
||||
if (buffer == NULL)
|
||||
{
|
||||
ereport_constraint_handler("SafeSnprintf: buffer is NULL", NULL, ESNULLP);
|
||||
}
|
||||
if (format == NULL)
|
||||
{
|
||||
ereport_constraint_handler("SafeSnprintf: format is NULL", NULL, ESNULLP);
|
||||
}
|
||||
|
||||
/* failure mode 4 */
|
||||
if (bufsz == 0)
|
||||
{
|
||||
ereport_constraint_handler("SafeSnprintf: bufsz is 0",
|
||||
NULL, ESZEROL);
|
||||
}
|
||||
|
||||
if (bufsz > RSIZE_MAX_STR)
|
||||
{
|
||||
ereport_constraint_handler("SafeSnprintf: bufsz exceeds max",
|
||||
NULL, ESLEMAX);
|
||||
}
|
||||
|
||||
va_list args;
|
||||
|
||||
va_start(args, format);
|
||||
size_t result = citus_vsnprintf(buffer, bufsz, format, args);
|
||||
va_end(args);
|
||||
return result;
|
||||
}
|
|
@ -58,6 +58,7 @@ column_name_to_column(PG_FUNCTION_ARGS)
|
|||
Relation relation = relation_open(relationId, AccessShareLock);
|
||||
|
||||
Var *column = BuildDistributionKeyFromColumnName(relation, columnName);
|
||||
Assert(column != NULL);
|
||||
char *columnNodeString = nodeToString(column);
|
||||
text *columnNodeText = cstring_to_text(columnNodeString);
|
||||
|
||||
|
@ -82,6 +83,7 @@ column_name_to_column_id(PG_FUNCTION_ARGS)
|
|||
Relation relation = relation_open(distributedTableId, AccessExclusiveLock);
|
||||
|
||||
Var *column = BuildDistributionKeyFromColumnName(relation, columnName);
|
||||
Assert(column != NULL);
|
||||
|
||||
relation_close(relation, NoLock);
|
||||
|
||||
|
|
|
@ -37,7 +37,22 @@
|
|||
#define X509_SUBJECT_COMMON_NAME "CN"
|
||||
|
||||
#define POSTGRES_DEFAULT_SSL_CIPHERS "HIGH:MEDIUM:+3DES:!aNULL"
|
||||
#define CITUS_DEFAULT_SSL_CIPHERS "TLSv1.2+HIGH:!aNULL:!eNULL"
|
||||
#define CITUS_DEFAULT_SSL_CIPHERS_OLD "TLSv1.2+HIGH:!aNULL:!eNULL"
|
||||
|
||||
/*
|
||||
* Microsoft approved cipher string.
|
||||
* This cipher string implicitely enables only TLSv1.2+, because these ciphers
|
||||
* were all added in TLSv1.2. This can be confirmed by running:
|
||||
* openssl -v <below strings concatenated>
|
||||
*/
|
||||
#define CITUS_DEFAULT_SSL_CIPHERS "ECDHE-ECDSA-AES128-GCM-SHA256:" \
|
||||
"ECDHE-ECDSA-AES256-GCM-SHA384:" \
|
||||
"ECDHE-RSA-AES128-GCM-SHA256:" \
|
||||
"ECDHE-RSA-AES256-GCM-SHA384:" \
|
||||
"ECDHE-ECDSA-AES128-SHA256:" \
|
||||
"ECDHE-ECDSA-AES256-SHA384:" \
|
||||
"ECDHE-RSA-AES128-SHA256:" \
|
||||
"ECDHE-RSA-AES256-SHA384"
|
||||
#define SET_CITUS_SSL_CIPHERS_QUERY \
|
||||
"ALTER SYSTEM SET ssl_ciphers TO '" CITUS_DEFAULT_SSL_CIPHERS "';"
|
||||
|
||||
|
@ -191,7 +206,7 @@ ShouldUseAutoSSL(void)
|
|||
const char *sslmode = NULL;
|
||||
sslmode = GetConnParam("sslmode");
|
||||
|
||||
if (strcmp(sslmode, "require") == 0)
|
||||
if (sslmode != NULL && strcmp(sslmode, "require") == 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
@ -392,7 +407,14 @@ StoreCertificate(EVP_PKEY *privateKey, X509 *certificate)
|
|||
|
||||
|
||||
/* Open the private key file and write the private key in PEM format to it */
|
||||
FILE *privateKeyFile = fopen(privateKeyFilename, "wb");
|
||||
int privateKeyFileDescriptor = open(privateKeyFilename, O_WRONLY | O_CREAT, 0600);
|
||||
if (privateKeyFileDescriptor == -1)
|
||||
{
|
||||
ereport(ERROR, (errmsg("unable to open private key file '%s' for writing",
|
||||
privateKeyFilename)));
|
||||
}
|
||||
|
||||
FILE *privateKeyFile = fdopen(privateKeyFileDescriptor, "wb");
|
||||
if (!privateKeyFile)
|
||||
{
|
||||
ereport(ERROR, (errmsg("unable to open private key file '%s' for writing",
|
||||
|
@ -407,8 +429,15 @@ StoreCertificate(EVP_PKEY *privateKey, X509 *certificate)
|
|||
ereport(ERROR, (errmsg("unable to store private key")));
|
||||
}
|
||||
|
||||
int certificateFileDescriptor = open(certificateFilename, O_WRONLY | O_CREAT, 0600);
|
||||
if (certificateFileDescriptor == -1)
|
||||
{
|
||||
ereport(ERROR, (errmsg("unable to open private key file '%s' for writing",
|
||||
privateKeyFilename)));
|
||||
}
|
||||
|
||||
/* Open the certificate file and write the certificate in the PEM format to it */
|
||||
FILE *certificateFile = fopen(certificateFilename, "wb");
|
||||
FILE *certificateFile = fdopen(certificateFileDescriptor, "wb");
|
||||
if (!certificateFile)
|
||||
{
|
||||
ereport(ERROR, (errmsg("unable to open certificate file '%s' for writing",
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
|
||||
#include "utils/lsyscache.h"
|
||||
#include "lib/stringinfo.h"
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "nodes/pg_list.h"
|
||||
#include "utils/memutils.h"
|
||||
|
@ -49,7 +50,7 @@ SortList(List *pointerList, int (*comparisonFunction)(const void *, const void *
|
|||
}
|
||||
|
||||
/* sort the array of pointers using the comparison function */
|
||||
qsort(array, arraySize, sizeof(void *), comparisonFunction);
|
||||
SafeQsort(array, arraySize, sizeof(void *), comparisonFunction);
|
||||
|
||||
/* convert the sorted array of pointers back to a sorted list */
|
||||
for (arrayIndex = 0; arrayIndex < arraySize; arrayIndex++)
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "commands/extension.h"
|
||||
#include "libpq/pqsignal.h"
|
||||
#include "catalog/namespace.h"
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "distributed/distributed_deadlock_detection.h"
|
||||
#include "distributed/maintenanced.h"
|
||||
#include "distributed/master_protocol.h"
|
||||
|
@ -164,9 +165,9 @@ InitializeMaintenanceDaemonBackend(void)
|
|||
|
||||
memset(&worker, 0, sizeof(worker));
|
||||
|
||||
snprintf(worker.bgw_name, BGW_MAXLEN,
|
||||
"Citus Maintenance Daemon: %u/%u",
|
||||
MyDatabaseId, extensionOwner);
|
||||
SafeSnprintf(worker.bgw_name, sizeof(worker.bgw_name),
|
||||
"Citus Maintenance Daemon: %u/%u",
|
||||
MyDatabaseId, extensionOwner);
|
||||
|
||||
/* request ability to connect to target database */
|
||||
worker.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
|
||||
|
@ -181,10 +182,14 @@ InitializeMaintenanceDaemonBackend(void)
|
|||
* Restart after a bit after errors, but don't bog the system.
|
||||
*/
|
||||
worker.bgw_restart_time = 5;
|
||||
sprintf(worker.bgw_library_name, "citus");
|
||||
sprintf(worker.bgw_function_name, "CitusMaintenanceDaemonMain");
|
||||
strcpy_s(worker.bgw_library_name,
|
||||
sizeof(worker.bgw_library_name), "citus");
|
||||
strcpy_s(worker.bgw_function_name, sizeof(worker.bgw_library_name),
|
||||
"CitusMaintenanceDaemonMain");
|
||||
|
||||
worker.bgw_main_arg = ObjectIdGetDatum(MyDatabaseId);
|
||||
memcpy(worker.bgw_extra, &extensionOwner, sizeof(Oid));
|
||||
memcpy_s(worker.bgw_extra, sizeof(worker.bgw_extra), &extensionOwner,
|
||||
sizeof(Oid));
|
||||
worker.bgw_notify_pid = MyProcPid;
|
||||
|
||||
if (!RegisterDynamicBackgroundWorker(&worker, &handle))
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -277,10 +277,8 @@ ReplicateShardToNode(ShardInterval *shardInterval, char *nodeName, int nodePort)
|
|||
CopyShardCommandList(shardInterval, srcNodeName, srcNodePort, includeData);
|
||||
|
||||
List *shardPlacementList = ShardPlacementList(shardId);
|
||||
bool missingWorkerOk = true;
|
||||
ShardPlacement *targetPlacement = SearchShardPlacementInList(shardPlacementList,
|
||||
nodeName, nodePort,
|
||||
missingWorkerOk);
|
||||
nodeName, nodePort);
|
||||
char *tableOwner = TableOwner(shardInterval->relationId);
|
||||
|
||||
/*
|
||||
|
|
|
@ -308,7 +308,7 @@ FindShardInterval(Datum partitionColumnValue, DistTableCacheEntry *cacheEntry)
|
|||
* INVALID_SHARD_INDEX is returned). This should only happen if something is
|
||||
* terribly wrong, either metadata tables are corrupted or we have a bug
|
||||
* somewhere. Such as a hash function which returns a value not in the range
|
||||
* of [INT32_MIN, INT32_MAX] can fire this.
|
||||
* of [PG_INT32_MIN, PG_INT32_MAX] can fire this.
|
||||
*/
|
||||
int
|
||||
FindShardIntervalIndex(Datum searchedValue, DistTableCacheEntry *cacheEntry)
|
||||
|
@ -349,20 +349,8 @@ FindShardIntervalIndex(Datum searchedValue, DistTableCacheEntry *cacheEntry)
|
|||
else
|
||||
{
|
||||
int hashedValue = DatumGetInt32(searchedValue);
|
||||
uint64 hashTokenIncrement = HASH_TOKEN_COUNT / shardCount;
|
||||
|
||||
shardIndex = (uint32) (hashedValue - INT32_MIN) / hashTokenIncrement;
|
||||
Assert(shardIndex <= shardCount);
|
||||
|
||||
/*
|
||||
* If the shard count is not power of 2, the range of the last
|
||||
* shard becomes larger than others. For that extra piece of range,
|
||||
* we still need to use the last shard.
|
||||
*/
|
||||
if (shardIndex == shardCount)
|
||||
{
|
||||
shardIndex = shardCount - 1;
|
||||
}
|
||||
shardIndex = CalculateUniformHashRangeIndex(hashedValue, shardCount);
|
||||
}
|
||||
}
|
||||
else if (partitionMethod == DISTRIBUTE_BY_NONE)
|
||||
|
@ -443,6 +431,48 @@ SearchCachedShardInterval(Datum partitionColumnValue, ShardInterval **shardInter
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* CalculateUniformHashRangeIndex returns the index of the hash range in
|
||||
* which hashedValue falls, assuming shardCount uniform hash ranges.
|
||||
*
|
||||
* We use 64-bit integers to avoid overflow issues during arithmetic.
|
||||
*
|
||||
* NOTE: This function is ONLY for hash-distributed tables with uniform
|
||||
* hash ranges.
|
||||
*/
|
||||
int
|
||||
CalculateUniformHashRangeIndex(int hashedValue, int shardCount)
|
||||
{
|
||||
int64 hashedValue64 = (int64) hashedValue;
|
||||
|
||||
/* normalize to the 0-UINT32_MAX range */
|
||||
int64 normalizedHashValue = hashedValue64 - PG_INT32_MIN;
|
||||
|
||||
/* size of each hash range */
|
||||
int64 hashRangeSize = HASH_TOKEN_COUNT / shardCount;
|
||||
|
||||
/* index of hash range into which the hash value falls */
|
||||
int shardIndex = (int) (normalizedHashValue / hashRangeSize);
|
||||
|
||||
if (shardIndex < 0 || shardIndex > shardCount)
|
||||
{
|
||||
ereport(ERROR, (errmsg("bug: shard index %d out of bounds", shardIndex)));
|
||||
}
|
||||
|
||||
/*
|
||||
* If the shard count is not power of 2, the range of the last
|
||||
* shard becomes larger than others. For that extra piece of range,
|
||||
* we still need to use the last shard.
|
||||
*/
|
||||
if (shardIndex == shardCount)
|
||||
{
|
||||
shardIndex = shardCount - 1;
|
||||
}
|
||||
|
||||
return shardIndex;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SingleReplicatedTable checks whether all shards of a distributed table, do not have
|
||||
* more than one replica. If even one shard has more than one replica, this function
|
||||
|
|
|
@ -21,17 +21,7 @@ PG_FUNCTION_INFO_V1(citus_server_id);
|
|||
#ifdef HAVE_LIBCURL
|
||||
|
||||
#include <curl/curl.h>
|
||||
#ifndef WIN32
|
||||
#include <sys/utsname.h>
|
||||
#else
|
||||
typedef struct utsname
|
||||
{
|
||||
char sysname[65];
|
||||
char release[65];
|
||||
char version[65];
|
||||
char machine[65];
|
||||
} utsname;
|
||||
#endif
|
||||
|
||||
#include "access/xact.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
|
@ -54,9 +44,6 @@ static bool SendHttpPostJsonRequest(const char *url, const char *postFields,
|
|||
long timeoutSeconds,
|
||||
curl_write_callback responseCallback);
|
||||
static bool PerformHttpRequest(CURL *curl);
|
||||
#ifdef WIN32
|
||||
static int uname(struct utsname *buf);
|
||||
#endif
|
||||
|
||||
|
||||
/* WarnIfSyncDNS warns if libcurl is compiled with synchronous DNS. */
|
||||
|
@ -360,103 +347,3 @@ citus_server_id(PG_FUNCTION_ARGS)
|
|||
|
||||
PG_RETURN_UUID_P((pg_uuid_t *) buf);
|
||||
}
|
||||
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
/*
|
||||
* Inspired by perl5's win32_uname
|
||||
* https://github.com/Perl/perl5/blob/69374fe705978962b85217f3eb828a93f836fd8d/win32/win32.c#L2057
|
||||
*/
|
||||
static int
|
||||
uname(struct utsname *buf)
|
||||
{
|
||||
OSVERSIONINFO ver;
|
||||
|
||||
ver.dwOSVersionInfoSize = sizeof(ver);
|
||||
GetVersionEx(&ver);
|
||||
|
||||
switch (ver.dwPlatformId)
|
||||
{
|
||||
case VER_PLATFORM_WIN32_WINDOWS:
|
||||
{
|
||||
strcpy(buf->sysname, "Windows");
|
||||
break;
|
||||
}
|
||||
|
||||
case VER_PLATFORM_WIN32_NT:
|
||||
{
|
||||
strcpy(buf->sysname, "Windows NT");
|
||||
break;
|
||||
}
|
||||
|
||||
case VER_PLATFORM_WIN32s:
|
||||
{
|
||||
strcpy(buf->sysname, "Win32s");
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
strcpy(buf->sysname, "Win32 Unknown");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
sprintf(buf->release, "%d.%d", ver.dwMajorVersion, ver.dwMinorVersion);
|
||||
|
||||
{
|
||||
SYSTEM_INFO info;
|
||||
char *arch;
|
||||
|
||||
GetSystemInfo(&info);
|
||||
DWORD procarch = info.wProcessorArchitecture;
|
||||
|
||||
switch (procarch)
|
||||
{
|
||||
case PROCESSOR_ARCHITECTURE_INTEL:
|
||||
{
|
||||
arch = "x86";
|
||||
break;
|
||||
}
|
||||
|
||||
case PROCESSOR_ARCHITECTURE_IA64:
|
||||
{
|
||||
arch = "x86";
|
||||
break;
|
||||
}
|
||||
|
||||
case PROCESSOR_ARCHITECTURE_AMD64:
|
||||
{
|
||||
arch = "x86";
|
||||
break;
|
||||
}
|
||||
|
||||
case PROCESSOR_ARCHITECTURE_UNKNOWN:
|
||||
{
|
||||
arch = "x86";
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
arch = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (arch != NULL)
|
||||
{
|
||||
strcpy(buf->machine, arch);
|
||||
}
|
||||
else
|
||||
{
|
||||
sprintf(buf->machine, "unknown(0x%x)", procarch);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include <unistd.h>
|
||||
|
||||
#include "commands/dbcommands.h"
|
||||
#include "distributed/citus_safe_lib.h"
|
||||
#include "distributed/multi_client_executor.h"
|
||||
#include "distributed/multi_server_executor.h"
|
||||
#include "distributed/task_tracker.h"
|
||||
|
@ -117,10 +118,11 @@ TaskTrackerRegister(void)
|
|||
worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
|
||||
worker.bgw_start_time = BgWorkerStart_ConsistentState;
|
||||
worker.bgw_restart_time = 1;
|
||||
snprintf(worker.bgw_library_name, BGW_MAXLEN, "citus");
|
||||
snprintf(worker.bgw_function_name, BGW_MAXLEN, "TaskTrackerMain");
|
||||
strcpy_s(worker.bgw_library_name, sizeof(worker.bgw_library_name), "citus");
|
||||
strcpy_s(worker.bgw_function_name, sizeof(worker.bgw_function_name),
|
||||
"TaskTrackerMain");
|
||||
worker.bgw_notify_pid = 0;
|
||||
snprintf(worker.bgw_name, BGW_MAXLEN, "task tracker");
|
||||
strcpy_s(worker.bgw_name, sizeof(worker.bgw_name), "task tracker");
|
||||
|
||||
RegisterBackgroundWorker(&worker);
|
||||
}
|
||||
|
@ -641,6 +643,10 @@ SchedulableTaskList(HTAB *WorkerTasksHash)
|
|||
|
||||
/* get all schedulable tasks ordered according to a priority criteria */
|
||||
WorkerTask *schedulableTaskQueue = SchedulableTaskPriorityQueue(WorkerTasksHash);
|
||||
if (schedulableTaskQueue == NULL)
|
||||
{
|
||||
return NIL;
|
||||
}
|
||||
|
||||
for (uint32 queueIndex = 0; queueIndex < tasksToScheduleCount; queueIndex++)
|
||||
{
|
||||
|
@ -702,7 +708,7 @@ SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash)
|
|||
}
|
||||
|
||||
/* now order elements in the queue according to our sorting criterion */
|
||||
qsort(priorityQueue, queueSize, WORKER_TASK_SIZE, CompareTasksByTime);
|
||||
SafeQsort(priorityQueue, queueSize, WORKER_TASK_SIZE, CompareTasksByTime);
|
||||
|
||||
return priorityQueue;
|
||||
}
|
||||
|
|
|
@ -210,8 +210,7 @@ CreateRenameCollationStmt(const ObjectAddress *address, char *newName)
|
|||
HeapTuple colltup = SearchSysCache1(COLLOID, collid);
|
||||
if (!HeapTupleIsValid(colltup))
|
||||
{
|
||||
elog(ERROR, "citus cache lookup error");
|
||||
return NULL;
|
||||
ereport(ERROR, (errmsg("citus cache lookup error")));
|
||||
}
|
||||
Form_pg_collation collationForm =
|
||||
(Form_pg_collation) GETSTRUCT(colltup);
|
||||
|
|
|
@ -226,7 +226,7 @@ ReceiveRegularFile(const char *nodeName, uint32 nodePort, const char *nodeUser,
|
|||
bool copyDone = false;
|
||||
|
||||
/* create local file to append remote data to */
|
||||
snprintf(filename, MAXPGPATH, "%s", filePath->data);
|
||||
strlcpy(filename, filePath->data, MAXPGPATH);
|
||||
|
||||
int32 fileDescriptor = BasicOpenFilePerm(filename, fileFlags, fileMode);
|
||||
if (fileDescriptor < 0)
|
||||
|
|
|
@ -464,12 +464,11 @@ CreateTaskTable(StringInfo schemaName, StringInfo relationName,
|
|||
Assert(schemaName != NULL);
|
||||
Assert(relationName != NULL);
|
||||
|
||||
/*
|
||||
* This new relation doesn't log to WAL, as the table creation and data copy
|
||||
* statements occur in the same transaction. Still, we want to make the
|
||||
* relation unlogged once we upgrade to PostgreSQL 9.1.
|
||||
*/
|
||||
RangeVar *relation = makeRangeVar(schemaName->data, relationName->data, -1);
|
||||
|
||||
/* this table will only exist for the duration of the query, avoid writing to WAL */
|
||||
relation->relpersistence = RELPERSISTENCE_UNLOGGED;
|
||||
|
||||
List *columnDefinitionList = ColumnDefinitionList(columnNameList, columnTypeList);
|
||||
|
||||
CreateStmt *createStatement = CreateStatement(relation, columnDefinitionList);
|
||||
|
|
|
@ -250,7 +250,7 @@ worker_hash_partition_table(PG_FUNCTION_ARGS)
|
|||
static ShardInterval **
|
||||
SyntheticShardIntervalArrayForShardMinValues(Datum *shardMinValues, int shardCount)
|
||||
{
|
||||
Datum nextShardMaxValue = Int32GetDatum(INT32_MAX);
|
||||
Datum nextShardMaxValue = Int32GetDatum(PG_INT32_MAX);
|
||||
ShardInterval **syntheticShardIntervalArray =
|
||||
palloc(sizeof(ShardInterval *) * shardCount);
|
||||
|
||||
|
@ -722,8 +722,8 @@ CitusRemoveDirectory(const char *filename)
|
|||
struct stat fileStat;
|
||||
int removed = 0;
|
||||
|
||||
int fileStated = stat(filename, &fileStat);
|
||||
if (fileStated < 0)
|
||||
int statOK = stat(filename, &fileStat);
|
||||
if (statOK < 0)
|
||||
{
|
||||
if (errno == ENOENT)
|
||||
{
|
||||
|
@ -936,7 +936,7 @@ FilterAndPartitionTable(const char *filterQuery,
|
|||
|
||||
while (SPI_processed > 0)
|
||||
{
|
||||
for (int rowIndex = 0; rowIndex < SPI_processed; rowIndex++)
|
||||
for (uint64 rowIndex = 0; rowIndex < SPI_processed; rowIndex++)
|
||||
{
|
||||
HeapTuple row = SPI_tuptable->vals[rowIndex];
|
||||
TupleDesc rowDescriptor = SPI_tuptable->tupdesc;
|
||||
|
@ -1240,7 +1240,6 @@ HashPartitionId(Datum partitionValue, Oid partitionCollation, const void *contex
|
|||
FmgrInfo *comparisonFunction = hashPartitionContext->comparisonFunction;
|
||||
Datum hashDatum = FunctionCall1Coll(hashFunction, DEFAULT_COLLATION_OID,
|
||||
partitionValue);
|
||||
int32 hashResult = 0;
|
||||
uint32 hashPartitionId = 0;
|
||||
|
||||
if (hashDatum == 0)
|
||||
|
@ -1250,10 +1249,8 @@ HashPartitionId(Datum partitionValue, Oid partitionCollation, const void *contex
|
|||
|
||||
if (hashPartitionContext->hasUniformHashDistribution)
|
||||
{
|
||||
uint64 hashTokenIncrement = HASH_TOKEN_COUNT / partitionCount;
|
||||
|
||||
hashResult = DatumGetInt32(hashDatum);
|
||||
hashPartitionId = (uint32) (hashResult - INT32_MIN) / hashTokenIncrement;
|
||||
int hashValue = DatumGetInt32(hashDatum);
|
||||
hashPartitionId = CalculateUniformHashRangeIndex(hashValue, partitionCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -29,35 +29,12 @@ extern CitusRTEKind GetRangeTblKind(RangeTblEntry *rte);
|
|||
|
||||
extern void RegisterNodes(void);
|
||||
|
||||
/*
|
||||
* Define read functions for citus nodes in a way they're usable across
|
||||
* several major versions. That requires some macro-uglyness as 9.6+ is quite
|
||||
* different from before.
|
||||
*/
|
||||
|
||||
#define READFUNC_ARGS struct ExtensibleNode *node
|
||||
#define READFUNC_RET void
|
||||
|
||||
#define OUTFUNC_ARGS StringInfo str, const struct ExtensibleNode *raw_node
|
||||
#define COPYFUNC_ARGS struct ExtensibleNode *target_node, const struct \
|
||||
ExtensibleNode *source_node
|
||||
|
||||
extern READFUNC_RET ReadJob(READFUNC_ARGS);
|
||||
extern READFUNC_RET ReadDistributedPlan(READFUNC_ARGS);
|
||||
extern READFUNC_RET ReadDistributedSubPlan(READFUNC_ARGS);
|
||||
extern READFUNC_RET ReadUsedDistributedSubPlan(READFUNC_ARGS);
|
||||
extern READFUNC_RET ReadShardInterval(READFUNC_ARGS);
|
||||
extern READFUNC_RET ReadMapMergeJob(READFUNC_ARGS);
|
||||
extern READFUNC_RET ReadShardPlacement(READFUNC_ARGS);
|
||||
extern READFUNC_RET ReadRelationShard(READFUNC_ARGS);
|
||||
extern READFUNC_RET ReadRelationRowLock(READFUNC_ARGS);
|
||||
extern READFUNC_RET ReadTask(READFUNC_ARGS);
|
||||
extern READFUNC_RET ReadLocalPlannedStatement(READFUNC_ARGS);
|
||||
extern READFUNC_RET ReadTaskExecution(READFUNC_ARGS);
|
||||
extern READFUNC_RET ReadDeferredErrorMessage(READFUNC_ARGS);
|
||||
extern READFUNC_RET ReadGroupShardPlacement(READFUNC_ARGS);
|
||||
|
||||
extern READFUNC_RET ReadUnsupportedCitusNode(READFUNC_ARGS);
|
||||
extern void ReadUnsupportedCitusNode(READFUNC_ARGS);
|
||||
|
||||
extern void OutJob(OUTFUNC_ARGS);
|
||||
extern void OutDistributedPlan(OUTFUNC_ARGS);
|
||||
|
|
|
@ -70,7 +70,7 @@ typedef enum CitusNodeTag
|
|||
} CitusNodeTag;
|
||||
|
||||
|
||||
const char** CitusNodeTagNames;
|
||||
extern const char** CitusNodeTagNames;
|
||||
|
||||
|
||||
typedef struct CitusNode
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* safe_lib.h
|
||||
*
|
||||
* This file contains helper functions to expand on the _s functions from
|
||||
* safestringlib.
|
||||
*
|
||||
* Copyright (c) Citus Data, Inc.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef CITUS_safe_lib_H
|
||||
#define CITUS_safe_lib_H
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "safe_lib.h"
|
||||
|
||||
extern void ereport_constraint_handler(const char *message, void *pointer, errno_t error);
|
||||
extern int64 SafeStringToInt64(const char *str);
|
||||
extern uint64 SafeStringToUint64(const char *str);
|
||||
extern void SafeQsort(void *ptr, rsize_t count, rsize_t size,
|
||||
int (*comp)(const void *, const void *));
|
||||
void * SafeBsearch(const void *key, const void *ptr, rsize_t count, rsize_t size,
|
||||
int (*comp)(const void *, const void *));
|
||||
int SafeSnprintf(char *str, rsize_t count, const char *fmt, ...);
|
||||
|
||||
#define memset_struct_0(variable) memset(&variable, 0, sizeof(variable))
|
||||
|
||||
#endif
|
|
@ -11,6 +11,8 @@
|
|||
#ifndef CONNECTION_MANAGMENT_H
|
||||
#define CONNECTION_MANAGMENT_H
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "distributed/transaction_management.h"
|
||||
#include "distributed/remote_transaction.h"
|
||||
#include "lib/ilist.h"
|
||||
|
@ -218,6 +220,7 @@ extern MultiConnection * StartNodeUserDatabaseConnection(uint32 flags,
|
|||
int32 port,
|
||||
const char *user,
|
||||
const char *database);
|
||||
extern void CloseAllConnectionsAfterTransaction(void);
|
||||
extern void CloseNodeConnectionsAfterTransaction(char *nodeName, int nodePort);
|
||||
extern void CloseConnection(MultiConnection *connection);
|
||||
extern void ShutdownConnection(MultiConnection *connection);
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
#include "distributed/citus_custom_scan.h"
|
||||
|
||||
|
||||
extern void RebuildQueryStrings(Query *originalQuery, List *taskList);
|
||||
extern void RebuildQueryStrings(Job *workerJob);
|
||||
extern bool UpdateRelationToShardNames(Node *node, List *relationShardList);
|
||||
extern void SetTaskQuery(Task *task, Query *query);
|
||||
extern void SetTaskQueryString(Task *task, char *queryString);
|
||||
|
|
|
@ -7,6 +7,10 @@
|
|||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef CITUS_FUNCTION_UTILS_H
|
||||
#define CITUS_FUNCTION_UTILS_H
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "nodes/execnodes.h"
|
||||
|
||||
|
@ -18,3 +22,5 @@ extern Oid FunctionOidExtended(const char *schemaName, const char *functionName,
|
|||
argumentCount, bool missingOK);
|
||||
extern ReturnSetInfo * FunctionCallGetTupleStore1(PGFunction function, Oid functionId,
|
||||
Datum argument);
|
||||
|
||||
#endif /* CITUS_FUNCTION_UTILS_H */
|
||||
|
|
|
@ -164,7 +164,8 @@ extern void CopyShardForeignConstraintCommandListGrouped(ShardInterval *shardInt
|
|||
List **
|
||||
referenceTableForeignConstraintList);
|
||||
extern ShardPlacement * SearchShardPlacementInList(List *shardPlacementList,
|
||||
char *nodeName, uint32 nodePort,
|
||||
bool missingOk);
|
||||
char *nodeName, uint32 nodePort);
|
||||
extern ShardPlacement * ForceSearchShardPlacementInList(List *shardPlacementList,
|
||||
char *nodeName, uint32 nodePort);
|
||||
|
||||
#endif /* MASTER_PROTOCOL_H */
|
||||
|
|
|
@ -160,6 +160,7 @@ extern char LookupDistributionMethod(Oid distributionMethodOid);
|
|||
extern HTAB * GetWorkerNodeHash(void);
|
||||
extern int GetWorkerNodeCount(void);
|
||||
extern WorkerNode * LookupNodeByNodeId(uint32 nodeId);
|
||||
extern WorkerNode * ForceLookupNodeByNodeId(uint32 nodeId);
|
||||
extern WorkerNode * LookupNodeForGroup(int32 groupId);
|
||||
|
||||
/* namespace oids */
|
||||
|
@ -193,6 +194,7 @@ extern Oid DistPlacementGroupidIndexId(void);
|
|||
extern Oid DistObjectPrimaryKeyIndexId(void);
|
||||
|
||||
/* type oids */
|
||||
extern Oid LookupTypeOid(char *schemaNameSting, char *typeNameString);
|
||||
extern Oid CitusCopyFormatTypeId(void);
|
||||
|
||||
/* function oids */
|
||||
|
@ -219,5 +221,4 @@ extern char * CitusExtensionOwnerName(void);
|
|||
extern char * CurrentUserName(void);
|
||||
extern const char * CurrentDatabaseName(void);
|
||||
|
||||
|
||||
#endif /* METADATA_CACHE_H */
|
||||
|
|
|
@ -105,6 +105,7 @@ extern Var * LeftColumnOrNULL(OpExpr *joinClause);
|
|||
extern Var * RightColumnOrNULL(OpExpr *joinClause);
|
||||
extern Var * PartitionColumn(Oid relationId, uint32 rangeTableId);
|
||||
extern Var * DistPartitionKey(Oid relationId);
|
||||
extern Var * ForceDistPartitionKey(Oid relationId);
|
||||
extern char PartitionMethod(Oid relationId);
|
||||
extern char TableReplicationModel(Oid relationId);
|
||||
|
||||
|
|
|
@ -80,9 +80,21 @@ typedef enum
|
|||
AGGREGATE_TOPN_UNION_AGG = 19,
|
||||
AGGREGATE_ANY_VALUE = 20,
|
||||
|
||||
/* support for github.com/tvondra/tdigest */
|
||||
AGGREGATE_TDIGEST_COMBINE = 21,
|
||||
AGGREGATE_TDIGEST_ADD_DOUBLE = 22,
|
||||
AGGREGATE_TDIGEST_PERCENTILE_ADD_DOUBLE = 23,
|
||||
AGGREGATE_TDIGEST_PERCENTILE_ADD_DOUBLEARRAY = 24,
|
||||
AGGREGATE_TDIGEST_PERCENTILE_TDIGEST_DOUBLE = 25,
|
||||
AGGREGATE_TDIGEST_PERCENTILE_TDIGEST_DOUBLEARRAY = 26,
|
||||
AGGREGATE_TDIGEST_PERCENTILE_OF_ADD_DOUBLE = 27,
|
||||
AGGREGATE_TDIGEST_PERCENTILE_OF_ADD_DOUBLEARRAY = 28,
|
||||
AGGREGATE_TDIGEST_PERCENTILE_OF_TDIGEST_DOUBLE = 29,
|
||||
AGGREGATE_TDIGEST_PERCENTILE_OF_TDIGEST_DOUBLEARRAY = 30,
|
||||
|
||||
/* AGGREGATE_CUSTOM must come last */
|
||||
AGGREGATE_CUSTOM_COMBINE = 21,
|
||||
AGGREGATE_CUSTOM_ROW_GATHER = 22,
|
||||
AGGREGATE_CUSTOM_COMBINE = 31,
|
||||
AGGREGATE_CUSTOM_ROW_GATHER = 32,
|
||||
} AggregateType;
|
||||
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue