Merge remote-tracking branch 'origin/master' into feature/drop_shards_on_drop_table

pull/326/head
Marco Slot 2016-02-17 22:52:58 +01:00
commit 75a141a7c6
197 changed files with 2316 additions and 2150 deletions

20
.gitattributes vendored
View File

@ -20,3 +20,23 @@ src/test/regress/output/*.source -whitespace
# These files are maintained or generated elsewhere. We take them as is. # These files are maintained or generated elsewhere. We take them as is.
configure -whitespace configure -whitespace
# all C files (implementation and header) use our style...
*.[ch] citus-style
# except these exceptions...
src/backend/distributed/utils/citus_outfuncs.c -citus-style
src/backend/distributed/utils/citus_read.c -citus-style
src/backend/distributed/utils/citus_readfuncs_94.c -citus-style
src/backend/distributed/utils/citus_readfuncs_95.c -citus-style
src/backend/distributed/utils/ruleutils_94.c -citus-style
src/backend/distributed/utils/ruleutils_95.c -citus-style
src/include/distributed/citus_nodes.h -citus-style
src/include/dumputils.h -citus-style
# all csql files use PostgreSQL style...
src/bin/csql/*.[ch] -citus-style
# except these exceptions
src/bin/csql/copy_options.c citus-style
src/bin/csql/stage.[ch] citus-style

View File

@ -1,11 +1,11 @@
# CitusDB toplevel Makefile # Citus toplevel Makefile
citusdb_subdir = . citus_subdir = .
citusdb_top_builddir = . citus_top_builddir = .
# Hint that configure should be run first # Hint that configure should be run first
ifeq (,$(wildcard Makefile.global)) ifeq (,$(wildcard Makefile.global))
$(error ./configure needs to be run before compiling CitusDB) $(error ./configure needs to be run before compiling Citus)
endif endif
include Makefile.global include Makefile.global
@ -20,9 +20,9 @@ install-extension:
install-headers: install-headers:
$(MKDIR_P) '$(includedir_server)/distributed/' $(MKDIR_P) '$(includedir_server)/distributed/'
# generated headers are located in the build directory # generated headers are located in the build directory
$(INSTALL_DATA) src/include/citusdb_config.h '$(includedir_server)/' $(INSTALL_DATA) src/include/citus_config.h '$(includedir_server)/'
# the rest in the source tree # the rest in the source tree
$(INSTALL_DATA) $(citusdb_abs_srcdir)/src/include/distributed/*.h '$(includedir_server)/distributed/' $(INSTALL_DATA) $(citus_abs_srcdir)/src/include/distributed/*.h '$(includedir_server)/distributed/'
clean-extension: clean-extension:
$(MAKE) -C src/backend/distributed/ clean $(MAKE) -C src/backend/distributed/ clean
.PHONY: extension install-extension clean-extension .PHONY: extension install-extension clean-extension
@ -42,6 +42,13 @@ clean-csql:
install: install-csql install: install-csql
clean: clean-csql clean: clean-csql
# apply or check style
reindent:
cd ${citus_abs_top_srcdir} && citus_indent --quiet
check-style:
cd ${citus_abs_top_srcdir} && citus_indent --quiet --check
.PHONY: reindent check-style
# depend on install for now # depend on install for now
check: all install check: all install
$(MAKE) -C src/test/regress check-full $(MAKE) -C src/test/regress check-full

View File

@ -9,40 +9,40 @@
# makefiles, particulary central handling of compilation flags and # makefiles, particulary central handling of compilation flags and
# rules. # rules.
citusdb_abs_srcdir:=@abs_top_srcdir@/${citusdb_subdir} citus_abs_srcdir:=@abs_top_srcdir@/${citus_subdir}
citusdb_abs_top_srcdir:=@abs_top_srcdir@ citus_abs_top_srcdir:=@abs_top_srcdir@
PG_CONFIG:=@PG_CONFIG@ PG_CONFIG:=@PG_CONFIG@
PGXS:=$(shell $(PG_CONFIG) --pgxs) PGXS:=$(shell $(PG_CONFIG) --pgxs)
# Support for VPATH builds (i.e. builds from outside the source tree) # Support for VPATH builds (i.e. builds from outside the source tree)
vpath_build=@vpath_build@ vpath_build=@vpath_build@
ifeq ($(vpath_build),yes) ifeq ($(vpath_build),yes)
VPATH:=$(citusdb_abs_srcdir) VPATH:=$(citus_abs_srcdir)
USE_VPATH:=$(VPATH) USE_VPATH:=$(VPATH)
endif endif
# CitusDB is built using PostgreSQL's pgxs # Citus is built using PostgreSQL's pgxs
USE_PGXS=1 USE_PGXS=1
include $(PGXS) include $(PGXS)
# Remake Makefile.global from Makefile.global.in if the latter # Remake Makefile.global from Makefile.global.in if the latter
# changed. In order to trigger this rule, the including file must # changed. In order to trigger this rule, the including file must
# write `include $(citusdb_top_builddir)/Makefile.global', not some # write `include $(citus_top_builddir)/Makefile.global', not some
# shortcut thereof. This makes it less likely to accidentally run # shortcut thereof. This makes it less likely to accidentally run
# with some outdated Makefile.global. # with some outdated Makefile.global.
# Make internally restarts whenever included Makefiles are # Make internally restarts whenever included Makefiles are
# regenerated. # regenerated.
$(citusdb_top_builddir)/Makefile.global: $(citusdb_top_builddir)/Makefile.global.in @top_srcdir@/configure $(citusdb_top_builddir)/config.status $(citus_top_builddir)/Makefile.global: $(citus_top_builddir)/Makefile.global.in @top_srcdir@/configure $(citus_top_builddir)/config.status
cd @abs_top_builddir@ && ./config.status Makefile.global cd @abs_top_builddir@ && ./config.status Makefile.global
# Ensure configuration is generated by the most recent configure, # Ensure configuration is generated by the most recent configure,
# useful for longer existing build directories. # useful for longer existing build directories.
$(citusdb_top_builddir)/config.status: @top_srcdir@/configure $(citus_top_builddir)/config.status: @top_srcdir@/configure
cd @abs_top_builddir@ && ./config.status --recheck cd @abs_top_builddir@ && ./config.status --recheck
# Regenerate configure if configure.in changed # Regenerate configure if configure.in changed
@top_srcdir@/configure: $(citusdb_abs_srcdir)/configure.in @top_srcdir@/configure: $(citus_abs_srcdir)/configure.in
cd ${citusdb_abs_srcdir} && ./autogen.sh cd ${citus_abs_srcdir} && ./autogen.sh
# If specified via configure, replace the default compiler. Normally # If specified via configure, replace the default compiler. Normally
# we'll build with the one postgres was built with. But it's useful to # we'll build with the one postgres was built with. But it's useful to
@ -54,8 +54,8 @@ endif
# Add options passed to configure or computed therein, to CFLAGS/CPPFLAGS/... # Add options passed to configure or computed therein, to CFLAGS/CPPFLAGS/...
override CFLAGS += @CFLAGS@ @CITUS_CFLAGS@ override CFLAGS += @CFLAGS@ @CITUS_CFLAGS@
override CPPFLAGS := @CPPFLAGS@ -I '${citusdb_abs_top_srcdir}/src/include' $(CPPFLAGS) override CPPFLAGS := @CPPFLAGS@ -I '${citus_abs_top_srcdir}/src/include' $(CPPFLAGS)
override LDFLAGS += @LDFLAGS@ override LDFLAGS += @LDFLAGS@
# optional file with user defined, additional, rules # optional file with user defined, additional, rules
-include ${citusdb_abs_srcdir}/src/Makefile.custom -include ${citus_abs_srcdir}/src/Makefile.custom

View File

@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
# #
# autogen.sh converts configure.in to configure and creates # autogen.sh converts configure.in to configure and creates
# citusdb_config.h.in. The resuting resulting files are checked into # citus_config.h.in. The resuting resulting files are checked into
# the SCM, to avoid everyone needing autoconf installed. # the SCM, to avoid everyone needing autoconf installed.
autoreconf -f autoreconf -f

28
configure vendored
View File

@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for CitusDB 5.0. # Generated by GNU Autoconf 2.69 for Citus 5.0.
# #
# #
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@ -577,10 +577,10 @@ MFLAGS=
MAKEFLAGS= MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='CitusDB' PACKAGE_NAME='Citus'
PACKAGE_TARNAME='citusdb' PACKAGE_TARNAME='citus'
PACKAGE_VERSION='5.0' PACKAGE_VERSION='5.0'
PACKAGE_STRING='CitusDB 5.0' PACKAGE_STRING='Citus 5.0'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@ -1190,7 +1190,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures CitusDB 5.0 to adapt to many kinds of systems. \`configure' configures Citus 5.0 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1238,7 +1238,7 @@ Fine tuning of the installation directories:
--infodir=DIR info documentation [DATAROOTDIR/info] --infodir=DIR info documentation [DATAROOTDIR/info]
--localedir=DIR locale-dependent data [DATAROOTDIR/locale] --localedir=DIR locale-dependent data [DATAROOTDIR/locale]
--mandir=DIR man documentation [DATAROOTDIR/man] --mandir=DIR man documentation [DATAROOTDIR/man]
--docdir=DIR documentation root [DATAROOTDIR/doc/citusdb] --docdir=DIR documentation root [DATAROOTDIR/doc/citus]
--htmldir=DIR html documentation [DOCDIR] --htmldir=DIR html documentation [DOCDIR]
--dvidir=DIR dvi documentation [DOCDIR] --dvidir=DIR dvi documentation [DOCDIR]
--pdfdir=DIR pdf documentation [DOCDIR] --pdfdir=DIR pdf documentation [DOCDIR]
@ -1251,7 +1251,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of CitusDB 5.0:";; short | recursive ) echo "Configuration of Citus 5.0:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@ -1333,7 +1333,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
CitusDB configure 5.0 Citus configure 5.0
generated by GNU Autoconf 2.69 generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc. Copyright (C) 2012 Free Software Foundation, Inc.
@ -1390,7 +1390,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by CitusDB $as_me 5.0, which was It was created by Citus $as_me 5.0, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@ $ $0 $@
@ -1871,7 +1871,7 @@ if test -z "$version_num"; then
fi fi
if test "$version_num" != '9.4' -a "$version_num" != '9.5'; then if test "$version_num" != '9.4' -a "$version_num" != '9.5'; then
as_fn_error $? "CitusDB is not compatible with the detected PostgreSQL version ${version_num}." "$LINENO" 5 as_fn_error $? "Citus is not compatible with the detected PostgreSQL version ${version_num}." "$LINENO" 5
else else
{ $as_echo "$as_me:${as_lineno-$LINENO}: building against PostgreSQL $version_num" >&5 { $as_echo "$as_me:${as_lineno-$LINENO}: building against PostgreSQL $version_num" >&5
$as_echo "$as_me: building against PostgreSQL $version_num" >&6;} $as_echo "$as_me: building against PostgreSQL $version_num" >&6;}
@ -2893,7 +2893,7 @@ CITUS_CFLAGS="$CITUS_CFLAGS"
ac_config_files="$ac_config_files Makefile.global" ac_config_files="$ac_config_files Makefile.global"
ac_config_headers="$ac_config_headers src/include/citusdb_config.h" ac_config_headers="$ac_config_headers src/include/citus_config.h"
cat >confcache <<\_ACEOF cat >confcache <<\_ACEOF
@ -3402,7 +3402,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by CitusDB $as_me 5.0, which was This file was extended by Citus $as_me 5.0, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@ -3464,7 +3464,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
CitusDB config.status 5.0 Citus config.status 5.0
configured by $0, generated by GNU Autoconf 2.69, configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"
@ -3586,7 +3586,7 @@ for ac_config_target in $ac_config_targets
do do
case $ac_config_target in case $ac_config_target in
"Makefile.global") CONFIG_FILES="$CONFIG_FILES Makefile.global" ;; "Makefile.global") CONFIG_FILES="$CONFIG_FILES Makefile.global" ;;
"src/include/citusdb_config.h") CONFIG_HEADERS="$CONFIG_HEADERS src/include/citusdb_config.h" ;; "src/include/citus_config.h") CONFIG_HEADERS="$CONFIG_HEADERS src/include/citus_config.h" ;;
*) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
esac esac

View File

@ -1,11 +1,11 @@
# CitusDB autoconf input script. # Citus autoconf input script.
# #
# Converted into an actual configure script by autogen.sh. This # Converted into an actual configure script by autogen.sh. This
# conversion only has to be done when configure.in changes. To avoid # conversion only has to be done when configure.in changes. To avoid
# everyone needing autoconf installed, the resulting files are checked # everyone needing autoconf installed, the resulting files are checked
# into the SCM. # into the SCM.
AC_INIT([CitusDB], [5.0], [], [citusdb], []) AC_INIT([Citus], [5.0], [], [citus], [])
AC_COPYRIGHT([Copyright (c) Copyright (c) 2012-2015, Citus Data, Inc.]) AC_COPYRIGHT([Copyright (c) Copyright (c) 2012-2015, Citus Data, Inc.])
AC_PROG_SED AC_PROG_SED
@ -32,7 +32,7 @@ if test -z "$version_num"; then
fi fi
if test "$version_num" != '9.4' -a "$version_num" != '9.5'; then if test "$version_num" != '9.4' -a "$version_num" != '9.5'; then
AC_MSG_ERROR([CitusDB is not compatible with the detected PostgreSQL version ${version_num}.]) AC_MSG_ERROR([Citus is not compatible with the detected PostgreSQL version ${version_num}.])
else else
AC_MSG_NOTICE([building against PostgreSQL $version_num]) AC_MSG_NOTICE([building against PostgreSQL $version_num])
fi; fi;
@ -96,11 +96,11 @@ CITUSAC_PROG_CC_CFLAGS_OPT([-Wmissing-prototypes])
AC_SUBST(CITUS_CFLAGS, "$CITUS_CFLAGS") AC_SUBST(CITUS_CFLAGS, "$CITUS_CFLAGS")
AC_CONFIG_FILES([Makefile.global]) AC_CONFIG_FILES([Makefile.global])
AC_CONFIG_HEADERS([src/include/citusdb_config.h]) AC_CONFIG_HEADERS([src/include/citus_config.h])
AH_TOP([ AH_TOP([
/* /*
* citusdb_config.h.in is generated by autoconf/autoheader and * citus_config.h.in is generated by autoconf/autoheader and
* converted into citusdb_config.h by configure. Include when code needs to * converted into citus_config.h by configure. Include when code needs to
* depend on determinations made by configure. * depend on determinations made by configure.
* *
* Do not manually edit! * Do not manually edit!

View File

@ -1,8 +1,8 @@
#! /bin/sh #! /bin/sh
# #
# CitusDB copy of PostgreSQL's config/prep_buildtree # Citus copy of PostgreSQL's config/prep_buildtree
# #
# This script prepares a CitusDB build tree for an out-of-tree/VPATH # This script prepares a Citus build tree for an out-of-tree/VPATH
# build. It is intended to be run by the configure script. # build. It is intended to be run by the configure script.
me=`basename $0` me=`basename $0`

View File

@ -10,4 +10,4 @@
/tmp_check* /tmp_check*
# ignore latest install file # ignore latest install file
citusdb--5.0.sql citus--5.0.sql

View File

@ -1,13 +1,13 @@
# Makefile for the CitusDB extension # Makefile for the Citus extension
citusdb_subdir = src/backend/distributed citus_subdir = src/backend/distributed
citusdb_top_builddir = ../../.. citus_top_builddir = ../../..
MODULE_big = citusdb MODULE_big = citus
EXTENSION = citusdb EXTENSION = citus
EXTVERSION = 5.0 EXTVERSION = 5.0
DATA_built = $(EXTENSION)--$(EXTVERSION).sql DATA_built = $(EXTENSION)--$(EXTVERSION).sql
SCRIPTS = $(wildcard $(citusdb_top_builddir)/src/bin/scripts/*) SCRIPTS = $(wildcard $(citus_top_builddir)/src/bin/scripts/*)
# directories with source files # directories with source files
SUBDIRS = . commands executor master planner relay test utils worker SUBDIRS = . commands executor master planner relay test utils worker
@ -15,7 +15,7 @@ SUBDIRS = . commands executor master planner relay test utils worker
# That patsubst rule searches all directories listed in SUBDIRS for .c # That patsubst rule searches all directories listed in SUBDIRS for .c
# files, and adds the corresponding .o files to OBJS # files, and adds the corresponding .o files to OBJS
OBJS += \ OBJS += \
$(patsubst $(citusdb_abs_srcdir)/%.c,%.o,$(foreach dir,$(SUBDIRS), $(wildcard $(citusdb_abs_srcdir)/$(dir)/*.c))) $(patsubst $(citus_abs_srcdir)/%.c,%.o,$(foreach dir,$(SUBDIRS), $(wildcard $(citus_abs_srcdir)/$(dir)/*.c)))
# define build process for latest install file # define build process for latest install file
$(EXTENSION)--$(EXTVERSION).sql: $(EXTENSION).sql $(EXTENSION)--$(EXTVERSION).sql: $(EXTENSION).sql
@ -28,6 +28,6 @@ NO_PGXS = 1
SHLIB_LINK = $(libpq) SHLIB_LINK = $(libpq)
include $(citusdb_top_builddir)/Makefile.global include $(citus_top_builddir)/Makefile.global
override CPPFLAGS += -I$(libpq_srcdir) override CPPFLAGS += -I$(libpq_srcdir)

View File

@ -0,0 +1,6 @@
# Citus extension
comment = 'Citus distributed database'
default_version = '5.0'
module_pathname = '$libdir/citus'
relocatable = false
schema = pg_catalog

View File

@ -1,24 +1,24 @@
/* citusdb.sql */ /* citus.sql */
-- complain if script is sourced in psql, rather than via CREATE EXTENSION -- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION citusdb" to load this file. \quit \echo Use "CREATE EXTENSION citus" to load this file. \quit
CREATE SCHEMA citusdb; CREATE SCHEMA citus;
-- Ensure CREATE EXTENSION is not run against an old citusdb data -- Ensure CREATE EXTENSION is not run against an old citus data
-- directory, we're not compatible (due to the builtin functions/tables) -- directory, we're not compatible (due to the builtin functions/tables)
DO $$ DO $$
BEGIN BEGIN
IF EXISTS(SELECT * FROM pg_proc WHERE proname = 'worker_apply_shard_ddl_command') THEN IF EXISTS(SELECT * FROM pg_proc WHERE proname = 'worker_apply_shard_ddl_command') THEN
RAISE 'cannot install citusdb extension in CitusDB 4 data directory'; RAISE 'cannot install citus extension in Citus 4 data directory';
END IF; END IF;
END; END;
$$; $$;
/***************************************************************************** /*****************************************************************************
* CitusDB data types * Citus data types
*****************************************************************************/ *****************************************************************************/
CREATE TYPE citusdb.distribution_type AS ENUM ( CREATE TYPE citus.distribution_type AS ENUM (
'hash', 'hash',
'range', 'range',
'append' 'append'
@ -26,18 +26,18 @@ CREATE TYPE citusdb.distribution_type AS ENUM (
/***************************************************************************** /*****************************************************************************
* CitusDB tables & corresponding indexes * Citus tables & corresponding indexes
*****************************************************************************/ *****************************************************************************/
CREATE TABLE citusdb.pg_dist_partition( CREATE TABLE citus.pg_dist_partition(
logicalrelid Oid NOT NULL, logicalrelid Oid NOT NULL,
partmethod "char" NOT NULL, partmethod "char" NOT NULL,
partkey text NOT NULL partkey text NOT NULL
); );
CREATE UNIQUE INDEX pg_dist_partition_logical_relid_index CREATE UNIQUE INDEX pg_dist_partition_logical_relid_index
ON citusdb.pg_dist_partition using btree(logicalrelid); ON citus.pg_dist_partition using btree(logicalrelid);
ALTER TABLE citusdb.pg_dist_partition SET SCHEMA pg_catalog; ALTER TABLE citus.pg_dist_partition SET SCHEMA pg_catalog;
CREATE TABLE citusdb.pg_dist_shard( CREATE TABLE citus.pg_dist_shard(
logicalrelid oid NOT NULL, logicalrelid oid NOT NULL,
shardid int8 NOT NULL, shardid int8 NOT NULL,
shardstorage "char" NOT NULL, shardstorage "char" NOT NULL,
@ -46,12 +46,12 @@ CREATE TABLE citusdb.pg_dist_shard(
shardmaxvalue text shardmaxvalue text
); );
CREATE UNIQUE INDEX pg_dist_shard_shardid_index CREATE UNIQUE INDEX pg_dist_shard_shardid_index
ON citusdb.pg_dist_shard using btree(shardid); ON citus.pg_dist_shard using btree(shardid);
CREATE INDEX pg_dist_shard_logical_relid_index CREATE INDEX pg_dist_shard_logical_relid_index
ON citusdb.pg_dist_shard using btree(logicalrelid); ON citus.pg_dist_shard using btree(logicalrelid);
ALTER TABLE citusdb.pg_dist_shard SET SCHEMA pg_catalog; ALTER TABLE citus.pg_dist_shard SET SCHEMA pg_catalog;
CREATE TABLE citusdb.pg_dist_shard_placement( CREATE TABLE citus.pg_dist_shard_placement(
shardid int8 NOT NULL, shardid int8 NOT NULL,
shardstate int4 NOT NULL, shardstate int4 NOT NULL,
shardlength int8 NOT NULL, shardlength int8 NOT NULL,
@ -59,40 +59,40 @@ CREATE TABLE citusdb.pg_dist_shard_placement(
nodeport int8 NOT NULL nodeport int8 NOT NULL
) WITH oids; ) WITH oids;
CREATE UNIQUE INDEX pg_dist_shard_placement_oid_index CREATE UNIQUE INDEX pg_dist_shard_placement_oid_index
ON citusdb.pg_dist_shard_placement using btree(oid); ON citus.pg_dist_shard_placement using btree(oid);
CREATE INDEX pg_dist_shard_placement_shardid_index CREATE INDEX pg_dist_shard_placement_shardid_index
ON citusdb.pg_dist_shard_placement using btree(shardid); ON citus.pg_dist_shard_placement using btree(shardid);
CREATE INDEX pg_dist_shard_placement_nodeid_index CREATE INDEX pg_dist_shard_placement_nodeid_index
ON citusdb.pg_dist_shard_placement using btree(nodename, nodeport); ON citus.pg_dist_shard_placement using btree(nodename, nodeport);
ALTER TABLE citusdb.pg_dist_shard_placement SET SCHEMA pg_catalog; ALTER TABLE citus.pg_dist_shard_placement SET SCHEMA pg_catalog;
/***************************************************************************** /*****************************************************************************
* CitusDB sequences * Citus sequences
*****************************************************************************/ *****************************************************************************/
/* /*
* Unternal sequence to generate 64-bit shard ids. These identifiers are then * Unternal sequence to generate 64-bit shard ids. These identifiers are then
* used to identify shards in the distributed database. * used to identify shards in the distributed database.
*/ */
CREATE SEQUENCE citusdb.pg_dist_shardid_seq CREATE SEQUENCE citus.pg_dist_shardid_seq
MINVALUE 102008 MINVALUE 102008
NO CYCLE; NO CYCLE;
ALTER SEQUENCE citusdb.pg_dist_shardid_seq SET SCHEMA pg_catalog; ALTER SEQUENCE citus.pg_dist_shardid_seq SET SCHEMA pg_catalog;
/* /*
* internal sequence to generate 32-bit jobIds. These identifiers are then * internal sequence to generate 32-bit jobIds. These identifiers are then
* used to identify jobs in the distributed database; and they wrap at 32-bits * used to identify jobs in the distributed database; and they wrap at 32-bits
* to allow for slave nodes to independently execute their distributed jobs. * to allow for slave nodes to independently execute their distributed jobs.
*/ */
CREATE SEQUENCE citusdb.pg_dist_jobid_seq CREATE SEQUENCE citus.pg_dist_jobid_seq
MINVALUE 2 /* first jobId reserved for clean up jobs */ MINVALUE 2 /* first jobId reserved for clean up jobs */
MAXVALUE 4294967296; MAXVALUE 4294967296;
ALTER SEQUENCE citusdb.pg_dist_jobid_seq SET SCHEMA pg_catalog; ALTER SEQUENCE citus.pg_dist_jobid_seq SET SCHEMA pg_catalog;
/***************************************************************************** /*****************************************************************************
* CitusDB functions * Citus functions
*****************************************************************************/ *****************************************************************************/
/* For backward compatibility and ease of use create functions et al. in pg_catalog */ /* For backward compatibility and ease of use create functions et al. in pg_catalog */
@ -182,13 +182,13 @@ COMMENT ON FUNCTION master_get_round_robin_candidate_nodes(shard_id bigint)
CREATE FUNCTION master_create_distributed_table(table_name regclass, CREATE FUNCTION master_create_distributed_table(table_name regclass,
distribution_column text, distribution_column text,
distribution_method citusdb.distribution_type) distribution_method citus.distribution_type)
RETURNS void RETURNS void
LANGUAGE C STRICT LANGUAGE C STRICT
AS 'MODULE_PATHNAME', $$master_create_distributed_table$$; AS 'MODULE_PATHNAME', $$master_create_distributed_table$$;
COMMENT ON FUNCTION master_create_distributed_table(table_name regclass, COMMENT ON FUNCTION master_create_distributed_table(table_name regclass,
distribution_column text, distribution_column text,
distribution_method citusdb.distribution_type) distribution_method citus.distribution_type)
IS 'define the table distribution functions'; IS 'define the table distribution functions';
-- define shard creation function for hash-partitioned tables -- define shard creation function for hash-partitioned tables
@ -323,7 +323,7 @@ COMMENT ON FUNCTION worker_append_table_to_shard(text, text, text, integer)
/* trigger functions */ /* trigger functions */
CREATE OR REPLACE FUNCTION citusdb_drop_trigger() CREATE OR REPLACE FUNCTION citus_drop_trigger()
RETURNS event_trigger RETURNS event_trigger
LANGUAGE plpgsql LANGUAGE plpgsql
SET search_path = pg_catalog SET search_path = pg_catalog
@ -349,7 +349,7 @@ BEGIN
END LOOP; END LOOP;
END; END;
$cdbdt$; $cdbdt$;
COMMENT ON FUNCTION citusdb_drop_trigger() COMMENT ON FUNCTION citus_drop_trigger()
IS 'perform checks and actions at the end of DROP actions'; IS 'perform checks and actions at the end of DROP actions';
CREATE FUNCTION master_dist_partition_cache_invalidate() CREATE FUNCTION master_dist_partition_cache_invalidate()
@ -369,21 +369,21 @@ COMMENT ON FUNCTION master_dist_shard_cache_invalidate()
/* internal functions, not user accessible */ /* internal functions, not user accessible */
CREATE FUNCTION citusdb_extradata_container(INTERNAL) CREATE FUNCTION citus_extradata_container(INTERNAL)
RETURNS void RETURNS void
LANGUAGE C LANGUAGE C
AS 'MODULE_PATHNAME', $$citusdb_extradata_container$$; AS 'MODULE_PATHNAME', $$citus_extradata_container$$;
COMMENT ON FUNCTION pg_catalog.citusdb_extradata_container(INTERNAL) COMMENT ON FUNCTION pg_catalog.citus_extradata_container(INTERNAL)
IS 'placeholder function to store additional data in postgres node trees'; IS 'placeholder function to store additional data in postgres node trees';
/***************************************************************************** /*****************************************************************************
* CitusDB triggers * Citus triggers
*****************************************************************************/ *****************************************************************************/
CREATE EVENT TRIGGER citusdb_cascade_to_partition CREATE EVENT TRIGGER citus_cascade_to_partition
ON SQL_DROP ON SQL_DROP
EXECUTE PROCEDURE citusdb_drop_trigger(); EXECUTE PROCEDURE citus_drop_trigger();
CREATE TRIGGER dist_partition_cache_invalidate CREATE TRIGGER dist_partition_cache_invalidate
AFTER INSERT OR UPDATE OR DELETE AFTER INSERT OR UPDATE OR DELETE
@ -397,7 +397,7 @@ CREATE TRIGGER dist_shard_cache_invalidate
/***************************************************************************** /*****************************************************************************
* CitusDB aggregates * Citus aggregates
*****************************************************************************/ *****************************************************************************/
CREATE AGGREGATE array_cat_agg(anyarray) (SFUNC = array_cat, STYPE = anyarray); CREATE AGGREGATE array_cat_agg(anyarray) (SFUNC = array_cat, STYPE = anyarray);
COMMENT ON AGGREGATE array_cat_agg(anyarray) COMMENT ON AGGREGATE array_cat_agg(anyarray)

View File

@ -1,6 +0,0 @@
# CitusDB extension
comment = 'CitusDB distributed database'
default_version = '5.0'
module_pathname = '$libdir/citusdb'
relocatable = false
schema = pg_catalog

View File

@ -165,7 +165,7 @@ master_create_distributed_table(PG_FUNCTION_ARGS)
* *
* Similarly, do not allow UNIQUE constraint and/or PRIMARY KEY if it does not * Similarly, do not allow UNIQUE constraint and/or PRIMARY KEY if it does not
* include partition column. This check is important for two reasons. First, * include partition column. This check is important for two reasons. First,
* currently CitusDB does not enforce uniqueness constraint on multiple shards. * currently Citus does not enforce uniqueness constraint on multiple shards.
* Second, INSERT INTO .. ON CONFLICT (i.e., UPSERT) queries can be executed with no * Second, INSERT INTO .. ON CONFLICT (i.e., UPSERT) queries can be executed with no
* further check for constraints. * further check for constraints.
*/ */
@ -191,17 +191,17 @@ master_create_distributed_table(PG_FUNCTION_ARGS)
} }
/* /*
* CitusDB cannot enforce uniqueness constraints with overlapping shards. Thus, * Citus cannot enforce uniqueness constraints with overlapping shards. Thus,
* emit a warning for unique indexes on append partitioned tables. * emit a warning for unique indexes on append partitioned tables.
*/ */
if (distributionMethod == DISTRIBUTE_BY_APPEND) if (distributionMethod == DISTRIBUTE_BY_APPEND)
{ {
ereport(WARNING, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ereport(WARNING, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("table \"%s\" has a unique constraint", errmsg("table \"%s\" has a unique constraint",
distributedRelationName), distributedRelationName),
errdetail("Unique constraints and primary keys on " errdetail("Unique constraints and primary keys on "
"append-partitioned tables cannot be enforced."), "append-partitioned tables cannot be enforced."),
errhint("Consider using hash partitioning."))); errhint("Consider using hash partitioning.")));
} }
attributeCount = indexInfo->ii_NumIndexAttrs; attributeCount = indexInfo->ii_NumIndexAttrs;
@ -262,7 +262,7 @@ master_create_distributed_table(PG_FUNCTION_ARGS)
* necessary for a distributed relation in addition to the preexisting ones * necessary for a distributed relation in addition to the preexisting ones
* for a normal relation. * for a normal relation.
* *
* We create one dependency from the (now distributed) relation to the citusdb * We create one dependency from the (now distributed) relation to the citus
* extension to prevent the extension from being dropped while distributed * extension to prevent the extension from being dropped while distributed
* tables exist. Furthermore a dependency from pg_dist_partition's * tables exist. Furthermore a dependency from pg_dist_partition's
* distribution clause to the underlying columns is created, but it's marked * distribution clause to the underlying columns is created, but it's marked
@ -281,7 +281,7 @@ RecordDistributedRelationDependencies(Oid distributedRelationId, Node *distribut
relationAddr.objectSubId = 0; relationAddr.objectSubId = 0;
citusExtensionAddr.classId = ExtensionRelationId; citusExtensionAddr.classId = ExtensionRelationId;
citusExtensionAddr.objectId = get_extension_oid("citusdb", false); citusExtensionAddr.objectId = get_extension_oid("citus", false);
citusExtensionAddr.objectSubId = 0; citusExtensionAddr.objectSubId = 0;
/* dependency from table entry to extension */ /* dependency from table entry to extension */
@ -294,10 +294,10 @@ RecordDistributedRelationDependencies(Oid distributedRelationId, Node *distribut
/* /*
* LookupDistributionMethod maps the oids of citusdb.distribution_type enum * LookupDistributionMethod maps the oids of citus.distribution_type enum
* values to pg_dist_partition.partmethod values. * values to pg_dist_partition.partmethod values.
* *
* The passed in oid has to belong to a value of citusdb.distribution_type. * The passed in oid has to belong to a value of citus.distribution_type.
*/ */
static char static char
LookupDistributionMethod(Oid distributionMethodOid) LookupDistributionMethod(Oid distributionMethodOid)

View File

@ -136,7 +136,7 @@ static File
FileOpenForTransmit(const char *filename, int fileFlags, int fileMode) FileOpenForTransmit(const char *filename, int fileFlags, int fileMode)
{ {
File fileDesc = -1; File fileDesc = -1;
int fileStated = -1; int fileStated = -1;
struct stat fileStat; struct stat fileStat;
fileStated = stat(filename, &fileStat); fileStated = stat(filename, &fileStat);
@ -145,7 +145,7 @@ FileOpenForTransmit(const char *filename, int fileFlags, int fileMode)
if (S_ISDIR(fileStat.st_mode)) if (S_ISDIR(fileStat.st_mode))
{ {
ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is a directory", filename))); errmsg("\"%s\" is a directory", filename)));
} }
} }
@ -270,18 +270,28 @@ ReceiveCopyData(StringInfo copyData)
switch (messageType) switch (messageType)
{ {
case 'd': /* CopyData */ case 'd': /* CopyData */
{
copyDone = false; copyDone = false;
break; break;
case 'c': /* CopyDone */ }
case 'c': /* CopyDone */
{
copyDone = true; copyDone = true;
break; break;
case 'f': /* CopyFail */ }
case 'f': /* CopyFail */
{
ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED), ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
errmsg("COPY data failed: %s", pq_getmsgstring(copyData)))); errmsg("COPY data failed: %s", pq_getmsgstring(copyData))));
break; break;
case 'H': /* Flush */ }
case 'S': /* Sync */
case 'H': /* Flush */
case 'S': /* Sync */
{
/* /*
* Ignore Flush/Sync for the convenience of client libraries (such * Ignore Flush/Sync for the convenience of client libraries (such
* as libpq) that may send those without noticing that the command * as libpq) that may send those without noticing that the command
@ -289,11 +299,15 @@ ReceiveCopyData(StringInfo copyData)
*/ */
copyDone = false; copyDone = false;
break; break;
}
default: default:
{
ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION),
errmsg("unexpected message type 0x%02X during COPY data", errmsg("unexpected message type 0x%02X during COPY data",
messageType))); messageType)));
break; break;
}
} }
return copyDone; return copyDone;

View File

@ -301,7 +301,7 @@ MultiClientCancel(int32 connectionId)
if (cancelSent == 0) if (cancelSent == 0)
{ {
ereport(WARNING, (errmsg("could not issue cancel request"), ereport(WARNING, (errmsg("could not issue cancel request"),
errdetail("Client error: %s", errorBuffer))); errdetail("Client error: %s", errorBuffer)));
canceled = false; canceled = false;
} }
@ -348,7 +348,7 @@ MultiClientResultStatus(int32 connectionId)
} }
else else
{ {
ereport(WARNING, (errmsg("could not consume data from worker node"))); ereport(WARNING, (errmsg("could not consume data from worker node")));
resultStatus = CLIENT_RESULT_UNAVAILABLE; resultStatus = CLIENT_RESULT_UNAVAILABLE;
} }
@ -589,7 +589,7 @@ MultiClientCopyData(int32 connectionId, int32 fileDescriptor)
while (receiveLength > 0) while (receiveLength > 0)
{ {
/* received copy data; append these data to file */ /* received copy data; append these data to file */
int appended = -1; int appended = -1;
errno = 0; errno = 0;
appended = write(fileDescriptor, receiveBuffer, receiveLength); appended = write(fileDescriptor, receiveBuffer, receiveLength);
@ -706,7 +706,7 @@ ClientConnectionReady(PGconn *connection, PostgresPollingStatusType pollingStatu
fd_set readFileDescriptorSet; fd_set readFileDescriptorSet;
fd_set writeFileDescriptorSet; fd_set writeFileDescriptorSet;
fd_set exceptionFileDescriptorSet; fd_set exceptionFileDescriptorSet;
struct timeval immediateTimeout = {0, 0}; struct timeval immediateTimeout = { 0, 0 };
int connectionFileDescriptor = PQsocket(connection); int connectionFileDescriptor = PQsocket(connection);
FD_ZERO(&readFileDescriptorSet); FD_ZERO(&readFileDescriptorSet);

View File

@ -157,7 +157,6 @@ multi_ExecutorStart(QueryDesc *queryDesc, int eflags)
queryDesc->plannedstmt = masterSelectPlan; queryDesc->plannedstmt = masterSelectPlan;
eflags |= EXEC_FLAG_CITUS_MASTER_SELECT; eflags |= EXEC_FLAG_CITUS_MASTER_SELECT;
} }
} }
/* if the execution is not done for router executor, drop into standard executor */ /* if the execution is not done for router executor, drop into standard executor */
@ -253,7 +252,7 @@ multi_ExecutorEnd(QueryDesc *queryDesc)
RangeTblEntry *rangeTableEntry = linitial(planStatement->rtable); RangeTblEntry *rangeTableEntry = linitial(planStatement->rtable);
Oid masterTableRelid = rangeTableEntry->relid; Oid masterTableRelid = rangeTableEntry->relid;
ObjectAddress masterTableObject = {InvalidOid, InvalidOid, 0}; ObjectAddress masterTableObject = { InvalidOid, InvalidOid, 0 };
masterTableObject.classId = RelationRelationId; masterTableObject.classId = RelationRelationId;
masterTableObject.objectId = masterTableRelid; masterTableObject.objectId = masterTableRelid;

View File

@ -89,7 +89,7 @@ MultiRealTimeExecute(Job *job)
} }
/* loop around until all tasks complete, one task fails, or user cancels */ /* loop around until all tasks complete, one task fails, or user cancels */
while ( !(allTasksCompleted || taskFailed || QueryCancelPending) ) while (!(allTasksCompleted || taskFailed || QueryCancelPending))
{ {
uint32 taskCount = list_length(taskList); uint32 taskCount = list_length(taskList);
uint32 completedTaskCount = 0; uint32 completedTaskCount = 0;
@ -230,333 +230,338 @@ ManageTaskExecution(Task *task, TaskExecution *taskExecution)
switch (currentStatus) switch (currentStatus)
{ {
case EXEC_TASK_CONNECT_START: case EXEC_TASK_CONNECT_START:
{
int32 connectionId = INVALID_CONNECTION_ID;
char *nodeDatabase = NULL;
/* we use the same database name on the master and worker nodes */
nodeDatabase = get_database_name(MyDatabaseId);
connectionId = MultiClientConnectStart(nodeName, nodePort, nodeDatabase);
connectionIdArray[currentIndex] = connectionId;
/* if valid, poll the connection until the connection is initiated */
if (connectionId != INVALID_CONNECTION_ID)
{ {
taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL; int32 connectionId = INVALID_CONNECTION_ID;
taskExecution->connectPollCount = 0; char *nodeDatabase = NULL;
connectAction = CONNECT_ACTION_OPENED;
}
else
{
AdjustStateForFailure(taskExecution);
}
break; /* we use the same database name on the master and worker nodes */
} nodeDatabase = get_database_name(MyDatabaseId);
case EXEC_TASK_CONNECT_POLL: connectionId = MultiClientConnectStart(nodeName, nodePort, nodeDatabase);
{ connectionIdArray[currentIndex] = connectionId;
int32 connectionId = connectionIdArray[currentIndex];
ConnectStatus pollStatus = MultiClientConnectPoll(connectionId);
/* /* if valid, poll the connection until the connection is initiated */
* If the connection is established, we reset the data fetch counter and if (connectionId != INVALID_CONNECTION_ID)
* change our status to data fetching.
*/
if (pollStatus == CLIENT_CONNECTION_READY)
{
taskExecution->dataFetchTaskIndex = -1;
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP;
}
else if (pollStatus == CLIENT_CONNECTION_BUSY)
{
taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL;
}
else if (pollStatus == CLIENT_CONNECTION_BAD)
{
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
}
/* now check if we have been trying to connect for too long */
taskExecution->connectPollCount++;
if (pollStatus == CLIENT_CONNECTION_BUSY)
{
uint32 maxCount = REMOTE_NODE_CONNECT_TIMEOUT / RemoteTaskCheckInterval;
uint32 currentCount = taskExecution->connectPollCount;
if (currentCount >= maxCount)
{ {
ereport(WARNING, (errmsg("could not establish asynchronous connection " taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL;
"after %u ms", REMOTE_NODE_CONNECT_TIMEOUT))); taskExecution->connectPollCount = 0;
connectAction = CONNECT_ACTION_OPENED;
}
else
{
AdjustStateForFailure(taskExecution);
}
break;
}
case EXEC_TASK_CONNECT_POLL:
{
int32 connectionId = connectionIdArray[currentIndex];
ConnectStatus pollStatus = MultiClientConnectPoll(connectionId);
/*
* If the connection is established, we reset the data fetch counter and
* change our status to data fetching.
*/
if (pollStatus == CLIENT_CONNECTION_READY)
{
taskExecution->dataFetchTaskIndex = -1;
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP;
}
else if (pollStatus == CLIENT_CONNECTION_BUSY)
{
taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL;
}
else if (pollStatus == CLIENT_CONNECTION_BAD)
{
taskStatusArray[currentIndex] = EXEC_TASK_FAILED; taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
} }
}
break; /* now check if we have been trying to connect for too long */
} taskExecution->connectPollCount++;
if (pollStatus == CLIENT_CONNECTION_BUSY)
case EXEC_TASK_FAILED: {
{ uint32 maxCount = REMOTE_NODE_CONNECT_TIMEOUT / RemoteTaskCheckInterval;
/* uint32 currentCount = taskExecution->connectPollCount;
* On task failure, we close the connection. We also reset our execution if (currentCount >= maxCount)
* status assuming that we might fail on all other worker nodes and come {
* back to this failed node. In that case, we will retry the same fetch ereport(WARNING, (errmsg("could not establish asynchronous "
* and compute task(s) on this node again. "connection after %u ms",
*/ REMOTE_NODE_CONNECT_TIMEOUT)));
int32 connectionId = connectionIdArray[currentIndex];
MultiClientDisconnect(connectionId); taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
connectionIdArray[currentIndex] = INVALID_CONNECTION_ID; }
connectAction = CONNECT_ACTION_CLOSED; }
taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_START; break;
}
/* try next worker node */
AdjustStateForFailure(taskExecution); case EXEC_TASK_FAILED:
{
break; /*
} * On task failure, we close the connection. We also reset our execution
* status assuming that we might fail on all other worker nodes and come
case EXEC_FETCH_TASK_LOOP: * back to this failed node. In that case, we will retry the same fetch
{ * and compute task(s) on this node again.
List *dataFetchTaskList = task->dependedTaskList; */
int32 dataFetchTaskCount = list_length(dataFetchTaskList); int32 connectionId = connectionIdArray[currentIndex];
MultiClientDisconnect(connectionId);
/* move to the next data fetch task */ connectionIdArray[currentIndex] = INVALID_CONNECTION_ID;
taskExecution->dataFetchTaskIndex++; connectAction = CONNECT_ACTION_CLOSED;
if (taskExecution->dataFetchTaskIndex < dataFetchTaskCount) taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_START;
{
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_START; /* try next worker node */
} AdjustStateForFailure(taskExecution);
else
{ break;
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_START; }
}
case EXEC_FETCH_TASK_LOOP:
break; {
} List *dataFetchTaskList = task->dependedTaskList;
int32 dataFetchTaskCount = list_length(dataFetchTaskList);
case EXEC_FETCH_TASK_START:
{ /* move to the next data fetch task */
List *dataFetchTaskList = task->dependedTaskList; taskExecution->dataFetchTaskIndex++;
int32 dataFetchTaskIndex = taskExecution->dataFetchTaskIndex;
Task *dataFetchTask = (Task *) list_nth(dataFetchTaskList, dataFetchTaskIndex); if (taskExecution->dataFetchTaskIndex < dataFetchTaskCount)
{
char *dataFetchQuery = dataFetchTask->queryString; taskStatusArray[currentIndex] = EXEC_FETCH_TASK_START;
int32 connectionId = connectionIdArray[currentIndex]; }
else
bool querySent = MultiClientSendQuery(connectionId, dataFetchQuery); {
if (querySent) taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_START;
{ }
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING;
} break;
else }
{
taskStatusArray[currentIndex] = EXEC_TASK_FAILED; case EXEC_FETCH_TASK_START:
} {
List *dataFetchTaskList = task->dependedTaskList;
break; int32 dataFetchTaskIndex = taskExecution->dataFetchTaskIndex;
} Task *dataFetchTask = (Task *) list_nth(dataFetchTaskList,
dataFetchTaskIndex);
case EXEC_FETCH_TASK_RUNNING:
{ char *dataFetchQuery = dataFetchTask->queryString;
int32 connectionId = connectionIdArray[currentIndex]; int32 connectionId = connectionIdArray[currentIndex];
ResultStatus resultStatus = MultiClientResultStatus(connectionId);
QueryStatus queryStatus = CLIENT_INVALID_QUERY; bool querySent = MultiClientSendQuery(connectionId, dataFetchQuery);
if (querySent)
/* check if query results are in progress or unavailable */ {
if (resultStatus == CLIENT_RESULT_BUSY) taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING;
{ }
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING; else
break; {
} taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
else if (resultStatus == CLIENT_RESULT_UNAVAILABLE) }
{
taskStatusArray[currentIndex] = EXEC_TASK_FAILED; break;
break; }
}
case EXEC_FETCH_TASK_RUNNING:
Assert(resultStatus == CLIENT_RESULT_READY); {
int32 connectionId = connectionIdArray[currentIndex];
/* ResultStatus resultStatus = MultiClientResultStatus(connectionId);
* If the query executed successfully, loop onto the next data fetch QueryStatus queryStatus = CLIENT_INVALID_QUERY;
* task. Else if the query failed, try data fetching on another node.
*/ /* check if query results are in progress or unavailable */
queryStatus = MultiClientQueryStatus(connectionId); if (resultStatus == CLIENT_RESULT_BUSY)
if (queryStatus == CLIENT_QUERY_DONE) {
{ taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING;
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP; break;
} }
else if (queryStatus == CLIENT_QUERY_FAILED) else if (resultStatus == CLIENT_RESULT_UNAVAILABLE)
{ {
taskStatusArray[currentIndex] = EXEC_TASK_FAILED; taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
} break;
else }
{
ereport(FATAL, (errmsg("invalid query status: %d", queryStatus))); Assert(resultStatus == CLIENT_RESULT_READY);
}
/*
break; * If the query executed successfully, loop onto the next data fetch
} * task. Else if the query failed, try data fetching on another node.
*/
case EXEC_COMPUTE_TASK_START: queryStatus = MultiClientQueryStatus(connectionId);
{ if (queryStatus == CLIENT_QUERY_DONE)
int32 connectionId = connectionIdArray[currentIndex]; {
bool querySent = false; taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP;
}
/* construct new query to copy query results to stdout */ else if (queryStatus == CLIENT_QUERY_FAILED)
char *queryString = task->queryString; {
StringInfo computeTaskQuery = makeStringInfo(); taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
if (BinaryMasterCopyFormat) }
{ else
appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_BINARY, queryString); {
} ereport(FATAL, (errmsg("invalid query status: %d", queryStatus)));
else }
{
appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_TEXT, queryString); break;
} }
querySent = MultiClientSendQuery(connectionId, computeTaskQuery->data); case EXEC_COMPUTE_TASK_START:
if (querySent) {
{ int32 connectionId = connectionIdArray[currentIndex];
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING; bool querySent = false;
}
else /* construct new query to copy query results to stdout */
{ char *queryString = task->queryString;
taskStatusArray[currentIndex] = EXEC_TASK_FAILED; StringInfo computeTaskQuery = makeStringInfo();
} if (BinaryMasterCopyFormat)
{
break; appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_BINARY,
} queryString);
}
case EXEC_COMPUTE_TASK_RUNNING: else
{ {
int32 connectionId = connectionIdArray[currentIndex]; appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_TEXT,
ResultStatus resultStatus = MultiClientResultStatus(connectionId); queryString);
QueryStatus queryStatus = CLIENT_INVALID_QUERY; }
/* check if query results are in progress or unavailable */ querySent = MultiClientSendQuery(connectionId, computeTaskQuery->data);
if (resultStatus == CLIENT_RESULT_BUSY) if (querySent)
{ {
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING; taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING;
break; }
} else
else if (resultStatus == CLIENT_RESULT_UNAVAILABLE) {
{ taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
taskStatusArray[currentIndex] = EXEC_TASK_FAILED; }
break;
} break;
}
Assert(resultStatus == CLIENT_RESULT_READY);
case EXEC_COMPUTE_TASK_RUNNING:
/* check if our request to copy query results has been acknowledged */ {
queryStatus = MultiClientQueryStatus(connectionId); int32 connectionId = connectionIdArray[currentIndex];
if (queryStatus == CLIENT_QUERY_COPY) ResultStatus resultStatus = MultiClientResultStatus(connectionId);
{ QueryStatus queryStatus = CLIENT_INVALID_QUERY;
StringInfo jobDirectoryName = JobDirectoryName(task->jobId);
StringInfo taskFilename = TaskFilename(jobDirectoryName, task->taskId); /* check if query results are in progress or unavailable */
if (resultStatus == CLIENT_RESULT_BUSY)
char *filename = taskFilename->data; {
int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING;
int fileMode = (S_IRUSR | S_IWUSR); break;
}
int32 fileDescriptor = BasicOpenFile(filename, fileFlags, fileMode); else if (resultStatus == CLIENT_RESULT_UNAVAILABLE)
if (fileDescriptor >= 0) {
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
break;
}
Assert(resultStatus == CLIENT_RESULT_READY);
/* check if our request to copy query results has been acknowledged */
queryStatus = MultiClientQueryStatus(connectionId);
if (queryStatus == CLIENT_QUERY_COPY)
{
StringInfo jobDirectoryName = JobDirectoryName(task->jobId);
StringInfo taskFilename = TaskFilename(jobDirectoryName, task->taskId);
char *filename = taskFilename->data;
int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY);
int fileMode = (S_IRUSR | S_IWUSR);
int32 fileDescriptor = BasicOpenFile(filename, fileFlags, fileMode);
if (fileDescriptor >= 0)
{
/*
* All files inside the job directory get automatically cleaned
* up on transaction commit or abort.
*/
fileDescriptorArray[currentIndex] = fileDescriptor;
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING;
}
else
{
ereport(WARNING, (errcode_for_file_access(),
errmsg("could not open file \"%s\": %m",
filename)));
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
}
}
else if (queryStatus == CLIENT_QUERY_FAILED)
{
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
}
else
{
ereport(FATAL, (errmsg("invalid query status: %d", queryStatus)));
}
break;
}
case EXEC_COMPUTE_TASK_COPYING:
{
int32 connectionId = connectionIdArray[currentIndex];
int32 fileDesc = fileDescriptorArray[currentIndex];
int closed = -1;
/* copy data from worker node, and write to local file */
CopyStatus copyStatus = MultiClientCopyData(connectionId, fileDesc);
/* if worker node will continue to send more data, keep reading */
if (copyStatus == CLIENT_COPY_MORE)
{ {
/*
* All files inside the job directory get automatically cleaned
* up on transaction commit or abort.
*/
fileDescriptorArray[currentIndex] = fileDescriptor;
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING; taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING;
} }
else else if (copyStatus == CLIENT_COPY_DONE)
{ {
ereport(WARNING, (errcode_for_file_access(), closed = close(fileDesc);
errmsg("could not open file \"%s\": %m", filename))); fileDescriptorArray[currentIndex] = -1;
if (closed >= 0)
{
taskStatusArray[currentIndex] = EXEC_TASK_DONE;
/* we are done executing; we no longer need the connection */
MultiClientDisconnect(connectionId);
connectionIdArray[currentIndex] = INVALID_CONNECTION_ID;
connectAction = CONNECT_ACTION_CLOSED;
}
else
{
ereport(WARNING, (errcode_for_file_access(),
errmsg("could not close copied file: %m")));
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
}
}
else if (copyStatus == CLIENT_COPY_FAILED)
{
taskStatusArray[currentIndex] = EXEC_TASK_FAILED; taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
closed = close(fileDesc);
fileDescriptorArray[currentIndex] = -1;
if (closed < 0)
{
ereport(WARNING, (errcode_for_file_access(),
errmsg("could not close copy file: %m")));
}
} }
break;
} }
else if (queryStatus == CLIENT_QUERY_FAILED)
case EXEC_TASK_DONE:
{ {
taskStatusArray[currentIndex] = EXEC_TASK_FAILED; /* we are done with this task's execution */
break;
} }
else
default:
{ {
ereport(FATAL, (errmsg("invalid query status: %d", queryStatus))); /* we fatal here to avoid leaking client-side resources */
ereport(FATAL, (errmsg("invalid execution status: %d", currentStatus)));
break;
} }
break;
}
case EXEC_COMPUTE_TASK_COPYING:
{
int32 connectionId = connectionIdArray[currentIndex];
int32 fileDesc = fileDescriptorArray[currentIndex];
int closed = -1;
/* copy data from worker node, and write to local file */
CopyStatus copyStatus = MultiClientCopyData(connectionId, fileDesc);
/* if worker node will continue to send more data, keep reading */
if (copyStatus == CLIENT_COPY_MORE)
{
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING;
}
else if (copyStatus == CLIENT_COPY_DONE)
{
closed = close(fileDesc);
fileDescriptorArray[currentIndex] = -1;
if (closed >= 0)
{
taskStatusArray[currentIndex] = EXEC_TASK_DONE;
/* we are done executing; we no longer need the connection */
MultiClientDisconnect(connectionId);
connectionIdArray[currentIndex] = INVALID_CONNECTION_ID;
connectAction = CONNECT_ACTION_CLOSED;
}
else
{
ereport(WARNING, (errcode_for_file_access(),
errmsg("could not close copied file: %m")));
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
}
}
else if (copyStatus == CLIENT_COPY_FAILED)
{
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
closed = close(fileDesc);
fileDescriptorArray[currentIndex] = -1;
if (closed < 0)
{
ereport(WARNING, (errcode_for_file_access(),
errmsg("could not close copy file: %m")));
}
}
break;
}
case EXEC_TASK_DONE:
{
/* we are done with this task's execution */
break;
}
default:
{
/* we fatal here to avoid leaking client-side resources */
ereport(FATAL, (errmsg("invalid execution status: %d", currentStatus)));
break;
}
} }
return connectAction; return connectAction;

View File

@ -80,6 +80,7 @@ RouterExecutorStart(QueryDesc *queryDesc, int eflags, Task *task)
queryDesc->estate = executorState; queryDesc->estate = executorState;
#if (PG_VERSION_NUM < 90500) #if (PG_VERSION_NUM < 90500)
/* make sure that upsertQuery is false for versions that UPSERT is not available */ /* make sure that upsertQuery is false for versions that UPSERT is not available */
Assert(task->upsertQuery == false); Assert(task->upsertQuery == false);
#endif #endif
@ -153,7 +154,7 @@ CommutativityRuleToLockMode(CmdType commandType, bool upsertQuery)
static void static void
AcquireExecutorShardLock(Task *task, LOCKMODE lockMode) AcquireExecutorShardLock(Task *task, LOCKMODE lockMode)
{ {
int64 shardId = task->shardId; int64 shardId = task->anchorShardId;
LockShardResource(shardId, lockMode); LockShardResource(shardId, lockMode);
} }
@ -177,14 +178,14 @@ RouterExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count, Tas
if (!ScanDirectionIsForward(direction)) if (!ScanDirectionIsForward(direction))
{ {
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("scan directions other than forward scans " errmsg("scan directions other than forward scans "
"are unsupported"))); "are unsupported")));
} }
if (count != 0) if (count != 0)
{ {
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("fetching rows from a query using a cursor " errmsg("fetching rows from a query using a cursor "
"is unsupported"))); "is unsupported")));
} }
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
@ -210,7 +211,7 @@ RouterExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count, Tas
else else
{ {
ereport(ERROR, (errmsg("unrecognized operation code: %d", ereport(ERROR, (errmsg("unrecognized operation code: %d",
(int) operation))); (int) operation)));
} }
if (queryDesc->totaltime != NULL) if (queryDesc->totaltime != NULL)
@ -219,9 +220,9 @@ RouterExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count, Tas
} }
MemoryContextSwitchTo(oldcontext); MemoryContextSwitchTo(oldcontext);
} }
/* /*
* ExecuteDistributedModify is the main entry point for modifying distributed * ExecuteDistributedModify is the main entry point for modifying distributed
* tables. A distributed modification is successful if any placement of the * tables. A distributed modification is successful if any placement of the
@ -250,7 +251,7 @@ ExecuteDistributedModify(Task *task)
Assert(taskPlacement->shardState == FILE_FINALIZED); Assert(taskPlacement->shardState == FILE_FINALIZED);
connection = GetConnection(nodeName, nodePort); connection = GetOrEstablishConnection(nodeName, nodePort);
if (connection == NULL) if (connection == NULL)
{ {
failedPlacementList = lappend(failedPlacementList, taskPlacement); failedPlacementList = lappend(failedPlacementList, taskPlacement);
@ -383,7 +384,7 @@ ExecuteTaskAndStoreResults(Task *task, TupleDesc tupleDescriptor,
bool queryOK = false; bool queryOK = false;
bool storedOK = false; bool storedOK = false;
PGconn *connection = GetConnection(nodeName, nodePort); PGconn *connection = GetOrEstablishConnection(nodeName, nodePort);
if (connection == NULL) if (connection == NULL)
{ {
continue; continue;
@ -532,9 +533,10 @@ StoreQueryResult(PGconn *connection, TupleDesc tupleDescriptor,
return true; return true;
} }
/* /*
* RouterExecutorFinish cleans up after a distributed execution. * RouterExecutorFinish cleans up after a distributed execution.
*/ */
void void
RouterExecutorFinish(QueryDesc *queryDesc) RouterExecutorFinish(QueryDesc *queryDesc)
{ {

View File

@ -73,7 +73,7 @@ JobExecutorType(MultiPlan *multiPlan)
ereport(WARNING, (errmsg("this query uses more connections than the " ereport(WARNING, (errmsg("this query uses more connections than the "
"configured max_connections limit"), "configured max_connections limit"),
errhint("Consider increasing max_connections or setting " errhint("Consider increasing max_connections or setting "
"citusdb.task_executor_type to " "citus.task_executor_type to "
"\"task-tracker\"."))); "\"task-tracker\".")));
} }
@ -88,7 +88,7 @@ JobExecutorType(MultiPlan *multiPlan)
ereport(WARNING, (errmsg("this query uses more file descriptors than the " ereport(WARNING, (errmsg("this query uses more file descriptors than the "
"configured max_files_per_process limit"), "configured max_files_per_process limit"),
errhint("Consider increasing max_files_per_process or " errhint("Consider increasing max_files_per_process or "
"setting citusdb.task_executor_type to " "setting citus.task_executor_type to "
"\"task-tracker\"."))); "\"task-tracker\".")));
} }
@ -96,7 +96,7 @@ JobExecutorType(MultiPlan *multiPlan)
if (dependedJobCount > 0) if (dependedJobCount > 0)
{ {
ereport(ERROR, (errmsg("cannot use real time executor with repartition jobs"), ereport(ERROR, (errmsg("cannot use real time executor with repartition jobs"),
errhint("Set citusdb.task_executor_type to " errhint("Set citus.task_executor_type to "
"\"task-tracker\"."))); "\"task-tracker\".")));
} }
} }
@ -119,7 +119,7 @@ JobExecutorType(MultiPlan *multiPlan)
if (dependedJobCount > 0) if (dependedJobCount > 0)
{ {
ereport(ERROR, (errmsg("cannot use router executor with repartition jobs"), ereport(ERROR, (errmsg("cannot use router executor with repartition jobs"),
errhint("Set citusdb.task_executor_type to " errhint("Set citus.task_executor_type to "
"\"task-tracker\"."))); "\"task-tracker\".")));
} }
@ -128,7 +128,7 @@ JobExecutorType(MultiPlan *multiPlan)
{ {
ereport(ERROR, (errmsg("cannot use router executor with queries that " ereport(ERROR, (errmsg("cannot use router executor with queries that "
"hit multiple shards"), "hit multiple shards"),
errhint("Set citusdb.task_executor_type to \"real-time\" or " errhint("Set citus.task_executor_type to \"real-time\" or "
"\"task-tracker\"."))); "\"task-tracker\".")));
} }
@ -138,7 +138,7 @@ JobExecutorType(MultiPlan *multiPlan)
if (list_length(workerDependentTaskList) > 0) if (list_length(workerDependentTaskList) > 0)
{ {
ereport(ERROR, (errmsg("cannot use router executor with JOINs"), ereport(ERROR, (errmsg("cannot use router executor with JOINs"),
errhint("Set citusdb.task_executor_type to \"real-time\" or " errhint("Set citus.task_executor_type to \"real-time\" or "
"\"task-tracker\"."))); "\"task-tracker\".")));
} }
@ -146,7 +146,7 @@ JobExecutorType(MultiPlan *multiPlan)
if (masterQuery != NULL && list_length(masterQuery->sortClause) > 0) if (masterQuery != NULL && list_length(masterQuery->sortClause) > 0)
{ {
ereport(ERROR, (errmsg("cannot use router executor with ORDER BY clauses"), ereport(ERROR, (errmsg("cannot use router executor with ORDER BY clauses"),
errhint("Set citusdb.task_executor_type to \"real-time\" or " errhint("Set citus.task_executor_type to \"real-time\" or "
"\"task-tracker\"."))); "\"task-tracker\".")));
} }
@ -158,7 +158,7 @@ JobExecutorType(MultiPlan *multiPlan)
if (masterQueryHasAggregates) if (masterQueryHasAggregates)
{ {
ereport(ERROR, (errmsg("cannot use router executor with aggregates"), ereport(ERROR, (errmsg("cannot use router executor with aggregates"),
errhint("Set citusdb.task_executor_type to \"real-time\" or " errhint("Set citus.task_executor_type to \"real-time\" or "
"\"task-tracker\"."))); "\"task-tracker\".")));
} }
} }
@ -173,7 +173,7 @@ JobExecutorType(MultiPlan *multiPlan)
* Every task requires 2 FDs, one file and one connection. Some FDs are taken by * Every task requires 2 FDs, one file and one connection. Some FDs are taken by
* the VFD pool and there is currently no way to reclaim these before opening a * the VFD pool and there is currently no way to reclaim these before opening a
* connection. We therefore assume some FDs to be reserved for VFDs, based on * connection. We therefore assume some FDs to be reserved for VFDs, based on
* observing a typical size of the pool on a CitusDB master. * observing a typical size of the pool on a Citus master.
*/ */
int int
MaxMasterConnectionCount(void) MaxMasterConnectionCount(void)
@ -303,13 +303,13 @@ AdjustStateForFailure(TaskExecution *taskExecution)
if (taskExecution->currentNodeIndex < maxNodeIndex) if (taskExecution->currentNodeIndex < maxNodeIndex)
{ {
taskExecution->currentNodeIndex++; /* try next worker node */ taskExecution->currentNodeIndex++; /* try next worker node */
} }
else else
{ {
taskExecution->currentNodeIndex = 0; /* go back to the first worker node */ taskExecution->currentNodeIndex = 0; /* go back to the first worker node */
} }
taskExecution->dataFetchTaskIndex = -1; /* reset data fetch counter */ taskExecution->dataFetchTaskIndex = -1; /* reset data fetch counter */
taskExecution->failureCount++; /* record failure */ taskExecution->failureCount++; /* record failure */
} }

View File

@ -1,6 +1,6 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* multi_utility.c * multi_utility.c
* CitusDB utility hook and related functionality. * Citus utility hook and related functionality.
* *
* Copyright (c) 2012-2015, Citus Data, Inc. * Copyright (c) 2012-2015, Citus Data, Inc.
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
@ -39,9 +39,9 @@
*/ */
struct DropRelationCallbackState struct DropRelationCallbackState
{ {
char relkind; char relkind;
Oid heapOid; Oid heapOid;
bool concurrent; bool concurrent;
}; };
@ -76,7 +76,7 @@ static void RangeVarCallbackForDropIndex(const RangeVar *rel, Oid relOid, Oid ol
/* /*
* Utility for handling citusdb specific concerns around utility statements. * Utility for handling citus specific concerns around utility statements.
* *
* There's two basic types of concerns here: * There's two basic types of concerns here:
* 1) Intercept utility statements that run after distributed query * 1) Intercept utility statements that run after distributed query
@ -168,32 +168,32 @@ multi_ProcessUtility(Node *parsetree,
/* /*
* Inform the user about potential caveats. * Inform the user about potential caveats.
* *
* To prevent failures in aborted transactions, CitusDBHasBeenLoaded() needs * To prevent failures in aborted transactions, CitusHasBeenLoaded() needs
* to be the second condition. See RelationIdGetRelation() which is called * to be the second condition. See RelationIdGetRelation() which is called
* by CitusDBHasBeenLoaded(). * by CitusHasBeenLoaded().
*/ */
if (IsA(parsetree, CreatedbStmt) && CitusDBHasBeenLoaded()) if (IsA(parsetree, CreatedbStmt) && CitusHasBeenLoaded())
{ {
ereport(NOTICE, (errmsg("CitusDB partially supports CREATE DATABASE for " ereport(NOTICE, (errmsg("Citus partially supports CREATE DATABASE for "
"distributed databases"), "distributed databases"),
errdetail("CitusDB does not propagate CREATE DATABASE " errdetail("Citus does not propagate CREATE DATABASE "
"command to workers"), "command to workers"),
errhint("You can manually create a database and its " errhint("You can manually create a database and its "
"extensions on workers."))); "extensions on workers.")));
} }
else if (IsA(parsetree, CreateSchemaStmt) && CitusDBHasBeenLoaded()) else if (IsA(parsetree, CreateSchemaStmt) && CitusHasBeenLoaded())
{ {
ereport(NOTICE, (errmsg("CitusDB partially supports CREATE SCHEMA " ereport(NOTICE, (errmsg("Citus partially supports CREATE SCHEMA "
"for distributed databases"), "for distributed databases"),
errdetail("schema usage in joins and in some UDFs " errdetail("schema usage in joins and in some UDFs "
"provided by CitusDB are not supported yet"))); "provided by Citus are not supported yet")));
} }
else if (IsA(parsetree, CreateRoleStmt) && CitusDBHasBeenLoaded()) else if (IsA(parsetree, CreateRoleStmt) && CitusHasBeenLoaded())
{ {
ereport(NOTICE, (errmsg("CitusDB does not support CREATE ROLE/USER " ereport(NOTICE, (errmsg("Citus does not support CREATE ROLE/USER "
"for distributed databases"), "for distributed databases"),
errdetail("Multiple roles are currently supported " errdetail("Multiple roles are currently supported "
"only for local tables"))); "only for local tables")));
} }
/* now drop into standard process utility */ /* now drop into standard process utility */
@ -204,7 +204,7 @@ multi_ProcessUtility(Node *parsetree,
/* /*
* WarnIfDropCitusExtension prints a WARNING if dropStatement includes dropping * WarnIfDropCitusExtension prints a WARNING if dropStatement includes dropping
* citusdb extension. * citus extension.
*/ */
static void static void
WarnIfDropCitusExtension(DropStmt *dropStatement) WarnIfDropCitusExtension(DropStmt *dropStatement)
@ -218,8 +218,8 @@ WarnIfDropCitusExtension(DropStmt *dropStatement)
List *objectNameList = lfirst(dropStatementObject); List *objectNameList = lfirst(dropStatementObject);
char *objectName = NameListToString(objectNameList); char *objectName = NameListToString(objectNameList);
/* we're only concerned with the citusdb extension */ /* we're only concerned with the citus extension */
if (strncmp("citusdb", objectName, NAMEDATALEN) == 0) if (strncmp("citus", objectName, NAMEDATALEN) == 0)
{ {
/* /*
* Warn the user about the possibility of invalid cache. Also, see * Warn the user about the possibility of invalid cache. Also, see
@ -296,7 +296,7 @@ VerifyTransmitStmt(CopyStmt *copyStatement)
/* /*
* ProcessCopyStmt handles CitusDB specific concerns for COPY like supporting * ProcessCopyStmt handles Citus specific concerns for COPY like supporting
* COPYing from distributed tables and preventing unsupported actions. * COPYing from distributed tables and preventing unsupported actions.
*/ */
static Node * static Node *
@ -757,7 +757,7 @@ IsAlterTableRenameStmt(RenameStmt *renameStmt)
isAlterTableRenameStmt = true; isAlterTableRenameStmt = true;
} }
#if (PG_VERSION_NUM >=90500) #if (PG_VERSION_NUM >= 90500)
else if (renameStmt->renameType == OBJECT_TABCONSTRAINT) else if (renameStmt->renameType == OBJECT_TABCONSTRAINT)
{ {
isAlterTableRenameStmt = true; isAlterTableRenameStmt = true;
@ -905,8 +905,9 @@ ExecuteCommandOnWorkerShards(Oid relationId, const char *commandString,
} }
else else
{ {
ereport(DEBUG2, (errmsg("applied command on shard " UINT64_FORMAT " on " ereport(DEBUG2, (errmsg("applied command on shard " UINT64_FORMAT
"node %s:%d", shardId, workerName, workerPort))); " on node %s:%d", shardId, workerName,
workerPort)));
} }
isFirstPlacement = false; isFirstPlacement = false;
@ -988,6 +989,7 @@ AllFinalizedPlacementsAccessible(Oid relationId)
static void static void
RangeVarCallbackForDropIndex(const RangeVar *rel, Oid relOid, Oid oldRelOid, void *arg) RangeVarCallbackForDropIndex(const RangeVar *rel, Oid relOid, Oid oldRelOid, void *arg)
{ {
/* *INDENT-OFF* */
HeapTuple tuple; HeapTuple tuple;
struct DropRelationCallbackState *state; struct DropRelationCallbackState *state;
char relkind; char relkind;
@ -1022,10 +1024,8 @@ RangeVarCallbackForDropIndex(const RangeVar *rel, Oid relOid, Oid oldRelOid, voi
classform = (Form_pg_class) GETSTRUCT(tuple); classform = (Form_pg_class) GETSTRUCT(tuple);
if (classform->relkind != relkind) if (classform->relkind != relkind)
{
ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not an index", rel->relname))); errmsg("\"%s\" is not an index", rel->relname)));
}
/* Allow DROP to either table owner or schema owner */ /* Allow DROP to either table owner or schema owner */
if (!pg_class_ownercheck(relOid, GetUserId()) && if (!pg_class_ownercheck(relOid, GetUserId()) &&
@ -1054,4 +1054,5 @@ RangeVarCallbackForDropIndex(const RangeVar *rel, Oid relOid, Oid oldRelOid, voi
if (OidIsValid(state->heapOid)) if (OidIsValid(state->heapOid))
LockRelationOid(state->heapOid, heap_lockmode); LockRelationOid(state->heapOid, heap_lockmode);
} }
/* *INDENT-ON* */
} }

View File

@ -185,7 +185,7 @@ master_create_worker_shards(PG_FUNCTION_ARGS)
LockShardDistributionMetadata(shardId, ExclusiveLock); LockShardDistributionMetadata(shardId, ExclusiveLock);
CreateShardPlacements(shardId, ddlCommandList, workerNodeList, CreateShardPlacements(shardId, ddlCommandList, workerNodeList,
roundRobinNodeIndex, replicationFactor); roundRobinNodeIndex, replicationFactor);
InsertShardRow(distributedTableId, shardId, shardStorageType, InsertShardRow(distributedTableId, shardId, shardStorageType,
minHashTokenText, maxHashTokenText); minHashTokenText, maxHashTokenText);

View File

@ -126,9 +126,9 @@ master_apply_delete_command(PG_FUNCTION_ARGS)
if ((partitionMethod == DISTRIBUTE_BY_HASH) && (deleteCriteria != NULL)) if ((partitionMethod == DISTRIBUTE_BY_HASH) && (deleteCriteria != NULL))
{ {
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot delete from distributed table"), errmsg("cannot delete from distributed table"),
errdetail("Delete statements on hash-partitioned tables " errdetail("Delete statements on hash-partitioned tables "
"with where clause is not supported"))); "with where clause is not supported")));
} }
CheckDeleteCriteria(deleteCriteria); CheckDeleteCriteria(deleteCriteria);
@ -149,8 +149,8 @@ master_apply_delete_command(PG_FUNCTION_ARGS)
else else
{ {
deletableShardIntervalList = ShardsMatchingDeleteCriteria(relationId, deletableShardIntervalList = ShardsMatchingDeleteCriteria(relationId,
shardIntervalList, shardIntervalList,
deleteCriteria); deleteCriteria);
} }
droppedShardCount = DropShards(relationId, schemaName, relationName, droppedShardCount = DropShards(relationId, schemaName, relationName,
@ -222,7 +222,7 @@ DropShards(Oid relationId, char *schemaName, char *relationName,
{ {
List *shardPlacementList = NIL; List *shardPlacementList = NIL;
List *droppedPlacementList = NIL; List *droppedPlacementList = NIL;
List *lingeringPlacementList= NIL; List *lingeringPlacementList = NIL;
ListCell *shardPlacementCell = NULL; ListCell *shardPlacementCell = NULL;
ListCell *droppedPlacementCell = NULL; ListCell *droppedPlacementCell = NULL;
ListCell *lingeringPlacementCell = NULL; ListCell *lingeringPlacementCell = NULL;
@ -251,7 +251,8 @@ DropShards(Oid relationId, char *schemaName, char *relationName,
shardPlacementList = ShardPlacementList(shardId); shardPlacementList = ShardPlacementList(shardId);
foreach(shardPlacementCell, shardPlacementList) foreach(shardPlacementCell, shardPlacementList)
{ {
ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(shardPlacementCell); ShardPlacement *shardPlacement =
(ShardPlacement *) lfirst(shardPlacementCell);
char *workerName = shardPlacement->nodeName; char *workerName = shardPlacement->nodeName;
uint32 workerPort = shardPlacement->nodePort; uint32 workerPort = shardPlacement->nodePort;
bool dropSuccessful = false; bool dropSuccessful = false;
@ -260,15 +261,18 @@ DropShards(Oid relationId, char *schemaName, char *relationName,
char storageType = shardInterval->storageType; char storageType = shardInterval->storageType;
if (storageType == SHARD_STORAGE_TABLE) if (storageType == SHARD_STORAGE_TABLE)
{ {
appendStringInfo(workerDropQuery, DROP_REGULAR_TABLE_COMMAND, quotedShardName); appendStringInfo(workerDropQuery, DROP_REGULAR_TABLE_COMMAND,
quotedShardName);
} }
else if (storageType == SHARD_STORAGE_COLUMNAR || else if (storageType == SHARD_STORAGE_COLUMNAR ||
storageType == SHARD_STORAGE_FOREIGN) storageType == SHARD_STORAGE_FOREIGN)
{ {
appendStringInfo(workerDropQuery, DROP_FOREIGN_TABLE_COMMAND, quotedShardName); appendStringInfo(workerDropQuery, DROP_FOREIGN_TABLE_COMMAND,
quotedShardName);
} }
dropSuccessful = ExecuteRemoteCommand(workerName, workerPort, workerDropQuery); dropSuccessful = ExecuteRemoteCommand(workerName, workerPort,
workerDropQuery);
if (dropSuccessful) if (dropSuccessful)
{ {
droppedPlacementList = lappend(droppedPlacementList, shardPlacement); droppedPlacementList = lappend(droppedPlacementList, shardPlacement);
@ -312,12 +316,13 @@ DropShards(Oid relationId, char *schemaName, char *relationName,
if (QueryCancelPending) if (QueryCancelPending)
{ {
ereport(WARNING, (errmsg("cancel requests are ignored during shard deletion"))); ereport(WARNING, (errmsg("cancel requests are ignored during shard "
"deletion")));
QueryCancelPending = false; QueryCancelPending = false;
} }
RESUME_INTERRUPTS(); RESUME_INTERRUPTS();
} }
droppedShardCount = list_length(deletableShardIntervalList); droppedShardCount = list_length(deletableShardIntervalList);
@ -343,7 +348,7 @@ CheckTableCount(Query *deleteQuery)
static void static void
CheckDeleteCriteria(Node *deleteCriteria) CheckDeleteCriteria(Node *deleteCriteria)
{ {
bool simpleOpExpression = true; bool simpleOpExpression = true;
if (deleteCriteria == NULL) if (deleteCriteria == NULL)
{ {
@ -372,7 +377,7 @@ CheckDeleteCriteria(Node *deleteCriteria)
} }
else else
{ {
simpleOpExpression = false; simpleOpExpression = false;
} }
if (!simpleOpExpression) if (!simpleOpExpression)
@ -384,15 +389,15 @@ CheckDeleteCriteria(Node *deleteCriteria)
} }
/* /*
* CheckPartitionColumn checks that the given where clause is based only on the * CheckPartitionColumn checks that the given where clause is based only on the
* partition key of the given relation id. * partition key of the given relation id.
*/ */
static void static void
CheckPartitionColumn(Oid relationId, Node *whereClause) CheckPartitionColumn(Oid relationId, Node *whereClause)
{ {
Var *partitionColumn = PartitionKey(relationId); Var *partitionColumn = PartitionKey(relationId);
ListCell *columnCell = NULL; ListCell *columnCell = NULL;
List *columnList = pull_var_clause_default(whereClause); List *columnList = pull_var_clause_default(whereClause);
foreach(columnCell, columnList) foreach(columnCell, columnList)
@ -418,7 +423,7 @@ CheckPartitionColumn(Oid relationId, Node *whereClause)
*/ */
static List * static List *
ShardsMatchingDeleteCriteria(Oid relationId, List *shardIntervalList, ShardsMatchingDeleteCriteria(Oid relationId, List *shardIntervalList,
Node *deleteCriteria) Node *deleteCriteria)
{ {
List *dropShardIntervalList = NIL; List *dropShardIntervalList = NIL;
List *deleteCriteriaList = NIL; List *deleteCriteriaList = NIL;

View File

@ -219,7 +219,7 @@ ShardLength(uint64 shardId)
if (shardPlacementList == NIL) if (shardPlacementList == NIL)
{ {
ereport(ERROR, (errmsg("could not find length of shard " UINT64_FORMAT, shardId), ereport(ERROR, (errmsg("could not find length of shard " UINT64_FORMAT, shardId),
errdetail("Could not find any shard placements for the shard."))); errdetail("Could not find any shard placements for the shard.")));
} }
else else
{ {

View File

@ -49,7 +49,7 @@
/* Shard related configuration */ /* Shard related configuration */
int ShardReplicationFactor = 2; /* desired replication factor for shards */ int ShardReplicationFactor = 2; /* desired replication factor for shards */
int ShardMaxSize = 1048576; /* maximum size in KB one shard can grow to */ int ShardMaxSize = 1048576; /* maximum size in KB one shard can grow to */
int ShardPlacementPolicy = SHARD_PLACEMENT_ROUND_ROBIN; int ShardPlacementPolicy = SHARD_PLACEMENT_ROUND_ROBIN;
@ -210,7 +210,7 @@ master_get_table_ddl_events(PG_FUNCTION_ARGS)
tableDDLEventCell = list_head(tableDDLEventList); tableDDLEventCell = list_head(tableDDLEventList);
functionContext->user_fctx = tableDDLEventCell; functionContext->user_fctx = tableDDLEventCell;
MemoryContextSwitchTo(oldContext); MemoryContextSwitchTo(oldContext);
} }
@ -226,8 +226,8 @@ master_get_table_ddl_events(PG_FUNCTION_ARGS)
if (tableDDLEventCell != NULL) if (tableDDLEventCell != NULL)
{ {
char *ddlStatement = (char *) lfirst(tableDDLEventCell); char *ddlStatement = (char *) lfirst(tableDDLEventCell);
text *ddlStatementText = cstring_to_text(ddlStatement); text *ddlStatementText = cstring_to_text(ddlStatement);
functionContext->user_fctx = lnext(tableDDLEventCell); functionContext->user_fctx = lnext(tableDDLEventCell);
SRF_RETURN_NEXT(functionContext, PointerGetDatum(ddlStatementText)); SRF_RETURN_NEXT(functionContext, PointerGetDatum(ddlStatementText));
@ -252,7 +252,7 @@ Datum
master_get_new_shardid(PG_FUNCTION_ARGS) master_get_new_shardid(PG_FUNCTION_ARGS)
{ {
text *sequenceName = cstring_to_text(SHARDID_SEQUENCE_NAME); text *sequenceName = cstring_to_text(SHARDID_SEQUENCE_NAME);
Oid sequenceId = ResolveRelationId(sequenceName); Oid sequenceId = ResolveRelationId(sequenceName);
Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId); Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId);
/* generate new and unique shardId from sequence */ /* generate new and unique shardId from sequence */
@ -281,7 +281,7 @@ master_get_local_first_candidate_nodes(PG_FUNCTION_ARGS)
if (SRF_IS_FIRSTCALL()) if (SRF_IS_FIRSTCALL())
{ {
MemoryContext oldContext = NULL; MemoryContext oldContext = NULL;
TupleDesc tupleDescriptor = NULL; TupleDesc tupleDescriptor = NULL;
uint32 liveNodeCount = 0; uint32 liveNodeCount = 0;
bool hasOid = false; bool hasOid = false;
@ -396,7 +396,7 @@ master_get_round_robin_candidate_nodes(PG_FUNCTION_ARGS)
if (SRF_IS_FIRSTCALL()) if (SRF_IS_FIRSTCALL())
{ {
MemoryContext oldContext = NULL; MemoryContext oldContext = NULL;
TupleDesc tupleDescriptor = NULL; TupleDesc tupleDescriptor = NULL;
List *workerNodeList = NIL; List *workerNodeList = NIL;
TypeFuncClass resultTypeClass = 0; TypeFuncClass resultTypeClass = 0;
@ -477,7 +477,7 @@ master_get_active_worker_nodes(PG_FUNCTION_ARGS)
if (SRF_IS_FIRSTCALL()) if (SRF_IS_FIRSTCALL())
{ {
MemoryContext oldContext = NULL; MemoryContext oldContext = NULL;
List *workerNodeList = NIL; List *workerNodeList = NIL;
uint32 workerNodeCount = 0; uint32 workerNodeCount = 0;
TupleDesc tupleDescriptor = NULL; TupleDesc tupleDescriptor = NULL;
@ -567,7 +567,7 @@ GetTableDDLEvents(Oid relationId)
Relation pgIndex = NULL; Relation pgIndex = NULL;
SysScanDesc scanDescriptor = NULL; SysScanDesc scanDescriptor = NULL;
ScanKeyData scanKey[1]; ScanKeyData scanKey[1];
int scanKeyCount = 1; int scanKeyCount = 1;
HeapTuple heapTuple = NULL; HeapTuple heapTuple = NULL;
@ -599,13 +599,13 @@ GetTableDDLEvents(Oid relationId)
/* fetch table schema and column option definitions */ /* fetch table schema and column option definitions */
tableSchemaDef = pg_get_tableschemadef_string(relationId); tableSchemaDef = pg_get_tableschemadef_string(relationId);
tableColumnOptionsDef = pg_get_tablecolumnoptionsdef_string(relationId); tableColumnOptionsDef = pg_get_tablecolumnoptionsdef_string(relationId);
tableDDLEventList = lappend(tableDDLEventList, tableSchemaDef); tableDDLEventList = lappend(tableDDLEventList, tableSchemaDef);
if (tableColumnOptionsDef != NULL) if (tableColumnOptionsDef != NULL)
{ {
tableDDLEventList = lappend(tableDDLEventList, tableColumnOptionsDef); tableDDLEventList = lappend(tableDDLEventList, tableColumnOptionsDef);
} }
/* open system catalog and scan all indexes that belong to this table */ /* open system catalog and scan all indexes that belong to this table */
pgIndex = heap_open(IndexRelationId, AccessShareLock); pgIndex = heap_open(IndexRelationId, AccessShareLock);
@ -660,7 +660,7 @@ GetTableDDLEvents(Oid relationId)
{ {
statementDef = pg_get_indexdef_string(indexId); statementDef = pg_get_indexdef_string(indexId);
} }
/* append found constraint or index definition to the list */ /* append found constraint or index definition to the list */
tableDDLEventList = lappend(tableDDLEventList, statementDef); tableDDLEventList = lappend(tableDDLEventList, statementDef);
@ -695,8 +695,8 @@ hostname_client_addr(void)
Port *port = MyProcPort; Port *port = MyProcPort;
char *remoteHost = NULL; char *remoteHost = NULL;
int remoteHostLen = NI_MAXHOST; int remoteHostLen = NI_MAXHOST;
int flags = NI_NAMEREQD; /* require fully qualified hostname */ int flags = NI_NAMEREQD; /* require fully qualified hostname */
int nameFound = 0; int nameFound = 0;
if (port == NULL) if (port == NULL)
{ {
@ -709,10 +709,15 @@ hostname_client_addr(void)
#ifdef HAVE_IPV6 #ifdef HAVE_IPV6
case AF_INET6: case AF_INET6:
#endif #endif
break; {
break;
}
default: default:
{
ereport(ERROR, (errmsg("invalid address family in connection"))); ereport(ERROR, (errmsg("invalid address family in connection")));
break; break;
}
} }
remoteHost = palloc0(remoteHostLen); remoteHost = palloc0(remoteHostLen);

View File

@ -45,7 +45,8 @@ static bool WorkerCreateShard(char *nodeName, uint32 nodePort,
static bool WorkerShardStats(char *nodeName, uint32 nodePort, Oid relationId, static bool WorkerShardStats(char *nodeName, uint32 nodePort, Oid relationId,
char *shardName, uint64 *shardLength, char *shardName, uint64 *shardLength,
text **shardMinValue, text **shardMaxValue); text **shardMinValue, text **shardMaxValue);
static uint64 WorkerTableSize(char *nodeName, uint32 nodePort, char *tableName); static uint64 WorkerTableSize(char *nodeName, uint32 nodePort, Oid relationId,
char *tableName);
static StringInfo WorkerPartitionValue(char *nodeName, uint32 nodePort, Oid relationId, static StringInfo WorkerPartitionValue(char *nodeName, uint32 nodePort, Oid relationId,
char *shardName, char *selectQuery); char *shardName, char *selectQuery);
@ -77,23 +78,22 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
List *candidateNodeList = NIL; List *candidateNodeList = NIL;
text *nullMinValue = NULL; text *nullMinValue = NULL;
text *nullMaxValue = NULL; text *nullMaxValue = NULL;
char tableType = 0;
char partitionMethod = 0; char partitionMethod = 0;
char storageType = SHARD_STORAGE_TABLE;
Oid relationId = ResolveRelationId(relationNameText); Oid relationId = ResolveRelationId(relationNameText);
CheckDistributedTable(relationId); CheckDistributedTable(relationId);
tableType = get_rel_relkind(relationId); if (CStoreTable(relationId))
if (tableType != RELKIND_RELATION)
{ {
ereport(ERROR, (errmsg("relation \"%s\" is not a regular table", relationName))); storageType = SHARD_STORAGE_COLUMNAR;
} }
partitionMethod = PartitionMethod(relationId); partitionMethod = PartitionMethod(relationId);
if (partitionMethod == DISTRIBUTE_BY_HASH) if (partitionMethod == DISTRIBUTE_BY_HASH)
{ {
ereport(ERROR, (errmsg("relation \"%s\" is a hash partitioned table", ereport(ERROR, (errmsg("relation \"%s\" is a hash partitioned table",
relationName), relationName),
errdetail("We currently don't support creating shards " errdetail("We currently don't support creating shards "
"on hash-partitioned tables"))); "on hash-partitioned tables")));
} }
@ -128,9 +128,9 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
} }
CreateShardPlacements(shardId, ddlEventList, candidateNodeList, 0, CreateShardPlacements(shardId, ddlEventList, candidateNodeList, 0,
ShardReplicationFactor); ShardReplicationFactor);
InsertShardRow(relationId, shardId, SHARD_STORAGE_TABLE, nullMinValue, nullMaxValue); InsertShardRow(relationId, shardId, storageType, nullMinValue, nullMaxValue);
PG_RETURN_INT64(shardId); PG_RETURN_INT64(shardId);
} }
@ -171,9 +171,10 @@ master_append_table_to_shard(PG_FUNCTION_ARGS)
ShardInterval *shardInterval = LoadShardInterval(shardId); ShardInterval *shardInterval = LoadShardInterval(shardId);
Oid relationId = shardInterval->relationId; Oid relationId = shardInterval->relationId;
bool cstoreTable = CStoreTable(relationId);
char storageType = shardInterval->storageType; char storageType = shardInterval->storageType;
if (storageType != SHARD_STORAGE_TABLE) if (storageType != SHARD_STORAGE_TABLE && !cstoreTable)
{ {
ereport(ERROR, (errmsg("cannot append to shardId " UINT64_FORMAT, shardId), ereport(ERROR, (errmsg("cannot append to shardId " UINT64_FORMAT, shardId),
errdetail("The underlying shard is not a regular table"))); errdetail("The underlying shard is not a regular table")));
@ -361,7 +362,7 @@ CheckDistributedTable(Oid relationId)
*/ */
void void
CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList, CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList,
int workerStartIndex, int replicationFactor) int workerStartIndex, int replicationFactor)
{ {
int attemptCount = replicationFactor; int attemptCount = replicationFactor;
int workerNodeCount = list_length(workerNodeList); int workerNodeCount = list_length(workerNodeList);
@ -393,7 +394,7 @@ CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList,
else else
{ {
ereport(WARNING, (errmsg("could not create shard on \"%s:%u\"", ereport(WARNING, (errmsg("could not create shard on \"%s:%u\"",
nodeName, nodePort))); nodeName, nodePort)));
} }
if (placementsCreated >= replicationFactor) if (placementsCreated >= replicationFactor)
@ -406,7 +407,7 @@ CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList,
if (placementsCreated < replicationFactor) if (placementsCreated < replicationFactor)
{ {
ereport(ERROR, (errmsg("could only create %u of %u of required shard replicas", ereport(ERROR, (errmsg("could only create %u of %u of required shard replicas",
placementsCreated, replicationFactor))); placementsCreated, replicationFactor)));
} }
} }
@ -457,7 +458,7 @@ WorkerShardStats(char *nodeName, uint32 nodePort, Oid relationId, char *shardNam
PG_TRY(); PG_TRY();
{ {
uint64 tableSize = WorkerTableSize(nodeName, nodePort, shardName); uint64 tableSize = WorkerTableSize(nodeName, nodePort, relationId, shardName);
StringInfo minValue = WorkerPartitionValue(nodeName, nodePort, relationId, StringInfo minValue = WorkerPartitionValue(nodeName, nodePort, relationId,
shardName, SHARD_MIN_VALUE_QUERY); shardName, SHARD_MIN_VALUE_QUERY);
StringInfo maxValue = WorkerPartitionValue(nodeName, nodePort, relationId, StringInfo maxValue = WorkerPartitionValue(nodeName, nodePort, relationId,
@ -479,18 +480,27 @@ WorkerShardStats(char *nodeName, uint32 nodePort, Oid relationId, char *shardNam
/* /*
* WorkerTableSize queries the worker node to extract the disk space used by the * WorkerTableSize queries the worker node to extract the disk space used by the
* given relation. The function assumes the relation represents a regular table. * given relation. The function assumes the relation represents a regular table or
* a cstore_fdw table.
*/ */
static uint64 static uint64
WorkerTableSize(char *nodeName, uint32 nodePort, char *tableName) WorkerTableSize(char *nodeName, uint32 nodePort, Oid relationId, char *tableName)
{ {
uint64 tableSize = 0; uint64 tableSize = 0;
List *queryResultList = NIL; List *queryResultList = NIL;
StringInfo tableSizeString = NULL; StringInfo tableSizeString = NULL;
char *tableSizeStringEnd = NULL; char *tableSizeStringEnd = NULL;
bool cstoreTable = CStoreTable(relationId);
StringInfo tableSizeQuery = makeStringInfo(); StringInfo tableSizeQuery = makeStringInfo();
appendStringInfo(tableSizeQuery, SHARD_TABLE_SIZE_QUERY, tableName);
if (cstoreTable)
{
appendStringInfo(tableSizeQuery, SHARD_CSTORE_TABLE_SIZE_QUERY, tableName);
}
else
{
appendStringInfo(tableSizeQuery, SHARD_TABLE_SIZE_QUERY, tableName);
}
queryResultList = ExecuteRemoteQuery(nodeName, nodePort, tableSizeQuery); queryResultList = ExecuteRemoteQuery(nodeName, nodePort, tableSizeQuery);
if (queryResultList == NIL) if (queryResultList == NIL)

View File

@ -121,16 +121,6 @@ ErrorIfQueryNotSupported(Query *queryTree)
Assert(commandType == CMD_INSERT || commandType == CMD_UPDATE || Assert(commandType == CMD_INSERT || commandType == CMD_UPDATE ||
commandType == CMD_DELETE); commandType == CMD_DELETE);
if (!(partitionMethod == DISTRIBUTE_BY_HASH ||
partitionMethod == DISTRIBUTE_BY_RANGE))
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot perform distributed planning for the given"
" modification"),
errdetail("Only hash- or range-partitioned tables may be the "
"target of distributed modifications")));
}
/* /*
* Reject subqueries which are in SELECT or WHERE clause. * Reject subqueries which are in SELECT or WHERE clause.
* Queries which include subqueries in FROM clauses are rejected below. * Queries which include subqueries in FROM clauses are rejected below.
@ -403,6 +393,7 @@ DistributedModifyTask(Query *query)
query->onConflict = RebuildOnConflict(relationId, query->onConflict); query->onConflict = RebuildOnConflict(relationId, query->onConflict);
} }
#else #else
/* always set to false for PG_VERSION_NUM < 90500 */ /* always set to false for PG_VERSION_NUM < 90500 */
upsertQuery = false; upsertQuery = false;
#endif #endif
@ -424,6 +415,7 @@ DistributedModifyTask(Query *query)
#if (PG_VERSION_NUM >= 90500) #if (PG_VERSION_NUM >= 90500)
/* /*
* RebuildOnConflict rebuilds OnConflictExpr for correct deparsing. The function * RebuildOnConflict rebuilds OnConflictExpr for correct deparsing. The function
* makes WHERE clause elements explicit and filters dropped columns * makes WHERE clause elements explicit and filters dropped columns
@ -443,7 +435,7 @@ RebuildOnConflict(Oid relationId, OnConflictExpr *originalOnConflict)
/* Convert onConflictWhere qualifiers to an explicitly and'd clause */ /* Convert onConflictWhere qualifiers to an explicitly and'd clause */
updatedOnConflict->onConflictWhere = updatedOnConflict->onConflictWhere =
(Node *) make_ands_explicit((List *) onConflictWhere); (Node *) make_ands_explicit((List *) onConflictWhere);
/* /*
* Here we handle dropped columns on the distributed table. onConflictSet * Here we handle dropped columns on the distributed table. onConflictSet
@ -458,7 +450,7 @@ RebuildOnConflict(Oid relationId, OnConflictExpr *originalOnConflict)
foreach(targetEntryCell, onConflictSet) foreach(targetEntryCell, onConflictSet)
{ {
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell); TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
FormData_pg_attribute *tableAttribute = tableAttributes[targetEntry->resno -1]; FormData_pg_attribute *tableAttribute = tableAttributes[targetEntry->resno - 1];
/* skip dropped columns */ /* skip dropped columns */
if (tableAttribute->attisdropped) if (tableAttribute->attisdropped)
@ -478,6 +470,8 @@ RebuildOnConflict(Oid relationId, OnConflictExpr *originalOnConflict)
return updatedOnConflict; return updatedOnConflict;
} }
#endif #endif

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* multi_explain.c * multi_explain.c
* CitusDB explain support. * Citus explain support.
* *
* Copyright (c) 2012-2015, Citus Data, Inc. * Copyright (c) 2012-2015, Citus Data, Inc.
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
@ -45,7 +45,7 @@ MultiExplainOneQuery(Query *query, IntoClause *into, ExplainState *es,
if (localQuery) if (localQuery)
{ {
PlannedStmt *plan = NULL; PlannedStmt *plan = NULL;
instr_time planstart; instr_time planstart;
instr_time planduration; instr_time planduration;
INSTR_TIME_SET_CURRENT(planstart); INSTR_TIME_SET_CURRENT(planstart);

View File

@ -33,18 +33,18 @@
/* Config variables managed via guc.c */ /* Config variables managed via guc.c */
int LargeTableShardCount = 4; /* shard counts for a large table */ int LargeTableShardCount = 4; /* shard counts for a large table */
bool LogMultiJoinOrder = false; /* print join order as a debugging aid */ bool LogMultiJoinOrder = false; /* print join order as a debugging aid */
/* Function pointer type definition for join rule evaluation functions */ /* Function pointer type definition for join rule evaluation functions */
typedef JoinOrderNode * (*RuleEvalFunction) (JoinOrderNode *currentJoinNode, typedef JoinOrderNode *(*RuleEvalFunction) (JoinOrderNode *currentJoinNode,
TableEntry *candidateTable, TableEntry *candidateTable,
List *candidateShardList, List *candidateShardList,
List *applicableJoinClauses, List *applicableJoinClauses,
JoinType joinType); JoinType joinType);
static char * RuleNameArray[JOIN_RULE_LAST] = {0}; /* ordered join rule names */ static char *RuleNameArray[JOIN_RULE_LAST] = { 0 }; /* ordered join rule names */
static RuleEvalFunction RuleEvalFunctionArray[JOIN_RULE_LAST] = {0}; /* join rules */ static RuleEvalFunction RuleEvalFunctionArray[JOIN_RULE_LAST] = { 0 }; /* join rules */
/* Local functions forward declarations */ /* Local functions forward declarations */
@ -54,7 +54,8 @@ static bool JoinExprListWalker(Node *node, List **joinList);
static bool ExtractLeftMostRangeTableIndex(Node *node, int *rangeTableIndex); static bool ExtractLeftMostRangeTableIndex(Node *node, int *rangeTableIndex);
static List * MergeShardIntervals(List *leftShardIntervalList, static List * MergeShardIntervals(List *leftShardIntervalList,
List *rightShardIntervalList, JoinType joinType); List *rightShardIntervalList, JoinType joinType);
static bool ShardIntervalsMatch(List *leftShardIntervalList, List *rightShardIntervalList); static bool ShardIntervalsMatch(List *leftShardIntervalList,
List *rightShardIntervalList);
static List * LoadSortedShardIntervalList(Oid relationId); static List * LoadSortedShardIntervalList(Oid relationId);
static List * JoinOrderForTable(TableEntry *firstTable, List *tableEntryList, static List * JoinOrderForTable(TableEntry *firstTable, List *tableEntryList,
List *joinClauseList); List *joinClauseList);
@ -68,31 +69,41 @@ static List * TableEntryListDifference(List *lhsTableList, List *rhsTableList);
static TableEntry * FindTableEntry(List *tableEntryList, uint32 tableId); static TableEntry * FindTableEntry(List *tableEntryList, uint32 tableId);
/* Local functions forward declarations for join evaluations */ /* Local functions forward declarations for join evaluations */
static JoinOrderNode * EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode, static JoinOrderNode * EvaluateJoinRules(List *joinedTableList,
TableEntry *candidateTable, List *candidateShardList, JoinOrderNode *currentJoinNode,
TableEntry *candidateTable,
List *candidateShardList,
List *joinClauseList, JoinType joinType); List *joinClauseList, JoinType joinType);
static List * RangeTableIdList(List *tableList); static List * RangeTableIdList(List *tableList);
static RuleEvalFunction JoinRuleEvalFunction(JoinRuleType ruleType); static RuleEvalFunction JoinRuleEvalFunction(JoinRuleType ruleType);
static char * JoinRuleName(JoinRuleType ruleType); static char * JoinRuleName(JoinRuleType ruleType);
static JoinOrderNode * BroadcastJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, static JoinOrderNode * BroadcastJoin(JoinOrderNode *joinNode, TableEntry *candidateTable,
List *candidateShardList, List *applicableJoinClauses, List *candidateShardList,
List *applicableJoinClauses,
JoinType joinType); JoinType joinType);
static JoinOrderNode * LocalJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, static JoinOrderNode * LocalJoin(JoinOrderNode *joinNode, TableEntry *candidateTable,
List *candidateShardList, List *applicableJoinClauses, List *candidateShardList, List *applicableJoinClauses,
JoinType joinType); JoinType joinType);
static bool JoinOnColumns(Var *currentPartitioncolumn, Var *candidatePartitionColumn, static bool JoinOnColumns(Var *currentPartitioncolumn, Var *candidatePartitionColumn,
List *joinClauseList); List *joinClauseList);
static JoinOrderNode * SinglePartitionJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, static JoinOrderNode * SinglePartitionJoin(JoinOrderNode *joinNode,
List *candidateShardList, List *applicableJoinClauses, TableEntry *candidateTable,
List *candidateShardList,
List *applicableJoinClauses,
JoinType joinType); JoinType joinType);
static JoinOrderNode * DualPartitionJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, static JoinOrderNode * DualPartitionJoin(JoinOrderNode *joinNode,
List *candidateShardList, List *applicableJoinClauses, TableEntry *candidateTable,
List *candidateShardList,
List *applicableJoinClauses,
JoinType joinType); JoinType joinType);
static JoinOrderNode * CartesianProduct(JoinOrderNode *joinNode, TableEntry *candidateTable, static JoinOrderNode * CartesianProduct(JoinOrderNode *joinNode,
List *candidateShardList, List *applicableJoinClauses, TableEntry *candidateTable,
List *candidateShardList,
List *applicableJoinClauses,
JoinType joinType); JoinType joinType);
static JoinOrderNode * MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType joinRuleType, static JoinOrderNode * MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType
Var *partitionColumn, char partitionMethod); joinRuleType, Var *partitionColumn,
char partitionMethod);
/* /*
@ -106,7 +117,7 @@ List *
FixedJoinOrderList(FromExpr *fromExpr, List *tableEntryList) FixedJoinOrderList(FromExpr *fromExpr, List *tableEntryList)
{ {
List *joinList = NIL; List *joinList = NIL;
ListCell * joinCell = NULL; ListCell *joinCell = NULL;
List *joinWhereClauseList = NIL; List *joinWhereClauseList = NIL;
List *joinOrderList = NIL; List *joinOrderList = NIL;
List *joinedTableList = NIL; List *joinedTableList = NIL;
@ -175,7 +186,7 @@ FixedJoinOrderList(FromExpr *fromExpr, List *tableEntryList)
"query"), "query"),
errdetail("Cannot perform outer joins with broadcast " errdetail("Cannot perform outer joins with broadcast "
"joins of more than 1 shard"), "joins of more than 1 shard"),
errhint("Set citusdb.large_table_shard_count to 1"))); errhint("Set citus.large_table_shard_count to 1")));
} }
} }
else if (nextJoinNode->joinRuleType == LOCAL_PARTITION_JOIN) else if (nextJoinNode->joinRuleType == LOCAL_PARTITION_JOIN)
@ -199,7 +210,6 @@ FixedJoinOrderList(FromExpr *fromExpr, List *tableEntryList)
"query"), "query"),
errdetail("Shards of relations in outer join queries " errdetail("Shards of relations in outer join queries "
"must have 1-to-1 shard partitioning"))); "must have 1-to-1 shard partitioning")));
} }
} }
else else
@ -439,7 +449,7 @@ MergeShardIntervals(List *leftShardIntervalList, List *rightShardIntervalList,
bool nextMaxSmaller = comparisonResult > 0; bool nextMaxSmaller = comparisonResult > 0;
if ((shardUnion && nextMaxLarger) || if ((shardUnion && nextMaxLarger) ||
(!shardUnion && nextMaxSmaller) ) (!shardUnion && nextMaxSmaller))
{ {
newShardInterval->maxValue = datumCopy(nextMax, typeByValue, typeLen); newShardInterval->maxValue = datumCopy(nextMax, typeByValue, typeLen);
} }
@ -586,7 +596,8 @@ ShardIntervalsMatch(List *leftShardIntervalList, List *rightShardIntervalList)
nextRightIntervalCell = lnext(rightShardIntervalCell); nextRightIntervalCell = lnext(rightShardIntervalCell);
if (nextRightIntervalCell != NULL) if (nextRightIntervalCell != NULL)
{ {
ShardInterval *nextRightInterval = (ShardInterval *) lfirst(nextRightIntervalCell); ShardInterval *nextRightInterval =
(ShardInterval *) lfirst(nextRightIntervalCell);
shardIntervalsIntersect = ShardIntervalsOverlap(leftInterval, shardIntervalsIntersect = ShardIntervalsOverlap(leftInterval,
nextRightInterval); nextRightInterval);
if (shardIntervalsIntersect) if (shardIntervalsIntersect)
@ -730,7 +741,7 @@ JoinOrderForTable(TableEntry *firstTable, List *tableEntryList, List *joinClause
* BestJoinOrder takes in a list of candidate join orders, and determines the * BestJoinOrder takes in a list of candidate join orders, and determines the
* best join order among these candidates. The function uses two heuristics for * best join order among these candidates. The function uses two heuristics for
* this. First, the function chooses join orders that have the fewest number of * this. First, the function chooses join orders that have the fewest number of
* join operators that cause large data transfers. Second, the function chooses * join operators that cause large data transfers. Second, the function chooses
* join orders where large data transfers occur later in the execution. * join orders where large data transfers occur later in the execution.
*/ */
static List * static List *
@ -1009,7 +1020,7 @@ EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode,
uint32 candidateTableId = 0; uint32 candidateTableId = 0;
List *joinedTableIdList = NIL; List *joinedTableIdList = NIL;
List *applicableJoinClauses = NIL; List *applicableJoinClauses = NIL;
uint32 lowestValidIndex = JOIN_RULE_INVALID_FIRST + 1; uint32 lowestValidIndex = JOIN_RULE_INVALID_FIRST + 1;
uint32 highestValidIndex = JOIN_RULE_LAST - 1; uint32 highestValidIndex = JOIN_RULE_LAST - 1;
uint32 ruleIndex = 0; uint32 ruleIndex = 0;
@ -1028,11 +1039,11 @@ EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode,
JoinRuleType ruleType = (JoinRuleType) ruleIndex; JoinRuleType ruleType = (JoinRuleType) ruleIndex;
RuleEvalFunction ruleEvalFunction = JoinRuleEvalFunction(ruleType); RuleEvalFunction ruleEvalFunction = JoinRuleEvalFunction(ruleType);
nextJoinNode = (*ruleEvalFunction) (currentJoinNode, nextJoinNode = (*ruleEvalFunction)(currentJoinNode,
candidateTable, candidateTable,
candidateShardList, candidateShardList,
applicableJoinClauses, applicableJoinClauses,
joinType); joinType);
/* break after finding the first join rule that applies */ /* break after finding the first join rule that applies */
if (nextJoinNode != NULL) if (nextJoinNode != NULL)

View File

@ -91,7 +91,8 @@ static void ParentSetNewChild(MultiNode *parentNode, MultiNode *oldChildNode,
/* Local functions forward declarations for aggregate expressions */ /* Local functions forward declarations for aggregate expressions */
static void ApplyExtendedOpNodes(MultiExtendedOp *originalNode, static void ApplyExtendedOpNodes(MultiExtendedOp *originalNode,
MultiExtendedOp *masterNode, MultiExtendedOp *workerNode); MultiExtendedOp *masterNode,
MultiExtendedOp *workerNode);
static void TransformSubqueryNode(MultiTable *subqueryNode); static void TransformSubqueryNode(MultiTable *subqueryNode);
static MultiExtendedOp * MasterExtendedOpNode(MultiExtendedOp *originalOpNode); static MultiExtendedOp * MasterExtendedOpNode(MultiExtendedOp *originalOpNode);
static Node * MasterAggregateMutator(Node *originalNode, AttrNumber *columnId); static Node * MasterAggregateMutator(Node *originalNode, AttrNumber *columnId);
@ -117,7 +118,8 @@ static void ErrorIfUnsupportedArrayAggregate(Aggref *arrayAggregateExpression);
static void ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression, static void ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression,
MultiNode *logicalPlanNode); MultiNode *logicalPlanNode);
static Var * AggregateDistinctColumn(Aggref *aggregateExpression); static Var * AggregateDistinctColumn(Aggref *aggregateExpression);
static bool TablePartitioningSupportsDistinct(List *tableNodeList, MultiExtendedOp *opNode, static bool TablePartitioningSupportsDistinct(List *tableNodeList,
MultiExtendedOp *opNode,
Var *distinctColumn); Var *distinctColumn);
static bool GroupedByColumn(List *groupClauseList, List *targetList, Var *column); static bool GroupedByColumn(List *groupClauseList, List *targetList, Var *column);
@ -257,6 +259,7 @@ MultiLogicalPlanOptimize(MultiTreeRoot *multiLogicalPlan)
MultiTable *tableNode = (MultiTable *) lfirst(tableNodeCell); MultiTable *tableNode = (MultiTable *) lfirst(tableNodeCell);
if (tableNode->relationId == SUBQUERY_RELATION_ID) if (tableNode->relationId == SUBQUERY_RELATION_ID)
{ {
ErrorIfContainsUnsupportedAggregate((MultiNode *) tableNode);
TransformSubqueryNode(tableNode); TransformSubqueryNode(tableNode);
} }
} }
@ -488,7 +491,7 @@ AddressProjectSpecialConditions(MultiProject *projectNode)
/* /*
* We check if we need to include any child columns in the project node to * We check if we need to include any child columns in the project node to
* address the following special conditions. * address the following special conditions.
* *
* SNC1: project node must include child node's projected columns, or * SNC1: project node must include child node's projected columns, or
* SNC2: project node must include child node's partition column, or * SNC2: project node must include child node's partition column, or
@ -637,7 +640,7 @@ Commutative(MultiUnaryNode *parentNode, MultiUnaryNode *childNode)
{ {
PushDownStatus pushDownStatus = PUSH_DOWN_NOT_VALID; PushDownStatus pushDownStatus = PUSH_DOWN_NOT_VALID;
CitusNodeTag parentNodeTag = CitusNodeTag(parentNode); CitusNodeTag parentNodeTag = CitusNodeTag(parentNode);
CitusNodeTag childNodeTag = CitusNodeTag(childNode); CitusNodeTag childNodeTag = CitusNodeTag(childNode);
/* we cannot be commutative with non-query operators */ /* we cannot be commutative with non-query operators */
if (childNodeTag == T_MultiTreeRoot || childNodeTag == T_MultiTable) if (childNodeTag == T_MultiTreeRoot || childNodeTag == T_MultiTable)
@ -692,7 +695,7 @@ Distributive(MultiUnaryNode *parentNode, MultiBinaryNode *childNode)
{ {
PushDownStatus pushDownStatus = PUSH_DOWN_NOT_VALID; PushDownStatus pushDownStatus = PUSH_DOWN_NOT_VALID;
CitusNodeTag parentNodeTag = CitusNodeTag(parentNode); CitusNodeTag parentNodeTag = CitusNodeTag(parentNode);
CitusNodeTag childNodeTag = CitusNodeTag(childNode); CitusNodeTag childNodeTag = CitusNodeTag(childNode);
/* special condition checks for partition operator are not implemented */ /* special condition checks for partition operator are not implemented */
Assert(parentNodeTag != T_MultiPartition); Assert(parentNodeTag != T_MultiPartition);
@ -751,7 +754,7 @@ Factorizable(MultiBinaryNode *parentNode, MultiUnaryNode *childNode)
{ {
PullUpStatus pullUpStatus = PULL_UP_NOT_VALID; PullUpStatus pullUpStatus = PULL_UP_NOT_VALID;
CitusNodeTag parentNodeTag = CitusNodeTag(parentNode); CitusNodeTag parentNodeTag = CitusNodeTag(parentNode);
CitusNodeTag childNodeTag = CitusNodeTag(childNode); CitusNodeTag childNodeTag = CitusNodeTag(childNode);
/* /*
* The following nodes are factorizable with their parents, but we don't * The following nodes are factorizable with their parents, but we don't
@ -1220,7 +1223,7 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode)
bool hasAggregates = contain_agg_clause((Node *) originalExpression); bool hasAggregates = contain_agg_clause((Node *) originalExpression);
if (hasAggregates) if (hasAggregates)
{ {
Node *newNode = MasterAggregateMutator((Node*) originalExpression, Node *newNode = MasterAggregateMutator((Node *) originalExpression,
&columnId); &columnId);
newExpression = (Expr *) newNode; newExpression = (Expr *) newNode;
} }
@ -1826,7 +1829,7 @@ WorkerAggregateExpressionList(Aggref *originalAggregate)
static AggregateType static AggregateType
GetAggregateType(Oid aggFunctionId) GetAggregateType(Oid aggFunctionId)
{ {
char *aggregateProcName = NULL; char *aggregateProcName = NULL;
uint32 aggregateCount = 0; uint32 aggregateCount = 0;
uint32 aggregateIndex = 0; uint32 aggregateIndex = 0;
bool found = false; bool found = false;
@ -1980,22 +1983,30 @@ CountDistinctHashFunctionName(Oid argumentType)
switch (argumentType) switch (argumentType)
{ {
case INT4OID: case INT4OID:
{
hashFunctionName = pstrdup(HLL_HASH_INTEGER_FUNC_NAME); hashFunctionName = pstrdup(HLL_HASH_INTEGER_FUNC_NAME);
break; break;
}
case INT8OID: case INT8OID:
{
hashFunctionName = pstrdup(HLL_HASH_BIGINT_FUNC_NAME); hashFunctionName = pstrdup(HLL_HASH_BIGINT_FUNC_NAME);
break; break;
}
case TEXTOID: case TEXTOID:
case BPCHAROID: case BPCHAROID:
case VARCHAROID: case VARCHAROID:
{
hashFunctionName = pstrdup(HLL_HASH_TEXT_FUNC_NAME); hashFunctionName = pstrdup(HLL_HASH_TEXT_FUNC_NAME);
break; break;
}
default: default:
{
hashFunctionName = pstrdup(HLL_HASH_ANY_FUNC_NAME); hashFunctionName = pstrdup(HLL_HASH_ANY_FUNC_NAME);
break; break;
}
} }
return hashFunctionName; return hashFunctionName;
@ -2135,8 +2146,9 @@ ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression,
bool distinctSupported = true; bool distinctSupported = true;
List *repartitionNodeList = NIL; List *repartitionNodeList = NIL;
Var *distinctColumn = NULL; Var *distinctColumn = NULL;
List *multiTableNodeList = NIL;
AggregateType aggregateType = GetAggregateType(aggregateExpression->aggfnoid); ListCell *multiTableNodeCell = NULL;
AggregateType aggregateType = AGGREGATE_INVALID_FIRST;
/* check if logical plan includes a subquery */ /* check if logical plan includes a subquery */
List *subqueryMultiTableList = SubqueryMultiTableList(logicalPlanNode); List *subqueryMultiTableList = SubqueryMultiTableList(logicalPlanNode);
@ -2147,7 +2159,20 @@ ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression,
errdetail("distinct in the outermost query is unsupported"))); errdetail("distinct in the outermost query is unsupported")));
} }
multiTableNodeList = FindNodesOfType(logicalPlanNode, T_MultiTable);
foreach(multiTableNodeCell, multiTableNodeList)
{
MultiTable *multiTable = (MultiTable *) lfirst(multiTableNodeCell);
if (multiTable->relationId == SUBQUERY_RELATION_ID)
{
ereport(ERROR, (errmsg("cannot compute count (distinct)"),
errdetail("Subqueries with aggregate (distinct) are "
"not supported yet")));
}
}
/* if we have a count(distinct), and distinct approximation is enabled */ /* if we have a count(distinct), and distinct approximation is enabled */
aggregateType = GetAggregateType(aggregateExpression->aggfnoid);
if (aggregateType == AGGREGATE_COUNT && if (aggregateType == AGGREGATE_COUNT &&
CountDistinctErrorRate != DISABLE_DISTINCT_APPROXIMATION) CountDistinctErrorRate != DISABLE_DISTINCT_APPROXIMATION)
{ {
@ -2479,7 +2504,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit)
if (subqueryTree->setOperations) if (subqueryTree->setOperations)
{ {
SetOperationStmt *setOperationStatement = SetOperationStmt *setOperationStatement =
(SetOperationStmt *) subqueryTree->setOperations; (SetOperationStmt *) subqueryTree->setOperations;
if (setOperationStatement->op == SETOP_UNION) if (setOperationStatement->op == SETOP_UNION)
{ {
@ -2563,7 +2588,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit)
List *joinTreeTableIndexList = NIL; List *joinTreeTableIndexList = NIL;
uint32 joiningTableCount = 0; uint32 joiningTableCount = 0;
ExtractRangeTableIndexWalker((Node*) subqueryTree->jointree, ExtractRangeTableIndexWalker((Node *) subqueryTree->jointree,
&joinTreeTableIndexList); &joinTreeTableIndexList);
joiningTableCount = list_length(joinTreeTableIndexList); joiningTableCount = list_length(joinTreeTableIndexList);
@ -2587,7 +2612,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit)
List *distinctTargetEntryList = GroupTargetEntryList(distinctClauseList, List *distinctTargetEntryList = GroupTargetEntryList(distinctClauseList,
targetEntryList); targetEntryList);
bool distinctOnPartitionColumn = bool distinctOnPartitionColumn =
TargetListOnPartitionColumn(subqueryTree, distinctTargetEntryList); TargetListOnPartitionColumn(subqueryTree, distinctTargetEntryList);
if (!distinctOnPartitionColumn) if (!distinctOnPartitionColumn)
{ {
preconditionsSatisfied = false; preconditionsSatisfied = false;
@ -2609,7 +2634,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit)
foreach(rangeTableEntryCell, subqueryEntryList) foreach(rangeTableEntryCell, subqueryEntryList)
{ {
RangeTblEntry *rangeTableEntry = RangeTblEntry *rangeTableEntry =
(RangeTblEntry *) lfirst(rangeTableEntryCell); (RangeTblEntry *) lfirst(rangeTableEntryCell);
Query *innerSubquery = rangeTableEntry->subquery; Query *innerSubquery = rangeTableEntry->subquery;
ErrorIfCannotPushdownSubquery(innerSubquery, outerQueryHasLimit); ErrorIfCannotPushdownSubquery(innerSubquery, outerQueryHasLimit);
@ -2639,7 +2664,7 @@ ErrorIfUnsupportedTableCombination(Query *queryTree)
* Extract all range table indexes from the join tree. Note that sub-queries * Extract all range table indexes from the join tree. Note that sub-queries
* that get pulled up by PostgreSQL don't appear in this join tree. * that get pulled up by PostgreSQL don't appear in this join tree.
*/ */
ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
foreach(joinTreeTableIndexCell, joinTreeTableIndexList) foreach(joinTreeTableIndexCell, joinTreeTableIndexList)
{ {
/* /*
@ -2768,7 +2793,7 @@ ErrorIfUnsupportedUnionQuery(Query *unionQuery)
leftQueryOnPartitionColumn = TargetListOnPartitionColumn(leftQuery, leftQueryOnPartitionColumn = TargetListOnPartitionColumn(leftQuery,
leftQuery->targetList); leftQuery->targetList);
rightQueryOnPartitionColumn = TargetListOnPartitionColumn(rightQuery, rightQueryOnPartitionColumn = TargetListOnPartitionColumn(rightQuery,
rightQuery->targetList); rightQuery->targetList);
if (!(leftQueryOnPartitionColumn && rightQueryOnPartitionColumn)) if (!(leftQueryOnPartitionColumn && rightQueryOnPartitionColumn))
{ {
@ -2807,7 +2832,7 @@ GroupTargetEntryList(List *groupClauseList, List *targetEntryList)
{ {
SortGroupClause *groupClause = (SortGroupClause *) lfirst(groupClauseCell); SortGroupClause *groupClause = (SortGroupClause *) lfirst(groupClauseCell);
TargetEntry *groupTargetEntry = TargetEntry *groupTargetEntry =
get_sortgroupclause_tle(groupClause, targetEntryList); get_sortgroupclause_tle(groupClause, targetEntryList);
groupTargetEntryList = lappend(groupTargetEntryList, groupTargetEntry); groupTargetEntryList = lappend(groupTargetEntryList, groupTargetEntry);
} }
@ -2890,7 +2915,7 @@ IsPartitionColumnRecursive(Expr *columnExpression, Query *query)
else if (IsA(columnExpression, FieldSelect)) else if (IsA(columnExpression, FieldSelect))
{ {
FieldSelect *compositeField = (FieldSelect *) columnExpression; FieldSelect *compositeField = (FieldSelect *) columnExpression;
Expr *fieldExpression = compositeField->arg; Expr *fieldExpression = compositeField->arg;
if (IsA(fieldExpression, Var)) if (IsA(fieldExpression, Var))
{ {
@ -2909,7 +2934,7 @@ IsPartitionColumnRecursive(Expr *columnExpression, Query *query)
return false; return false;
} }
rangeTableEntryIndex = candidateColumn->varno - 1; rangeTableEntryIndex = candidateColumn->varno - 1;
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex); rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
if (rangeTableEntry->rtekind == RTE_RELATION) if (rangeTableEntry->rtekind == RTE_RELATION)
@ -2980,7 +3005,7 @@ CompositeFieldRecursive(Expr *expression, Query *query)
return NULL; return NULL;
} }
rangeTableEntryIndex = candidateColumn->varno - 1; rangeTableEntryIndex = candidateColumn->varno - 1;
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex); rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
if (rangeTableEntry->rtekind == RTE_SUBQUERY) if (rangeTableEntry->rtekind == RTE_SUBQUERY)
@ -3019,7 +3044,7 @@ FullCompositeFieldList(List *compositeFieldList)
uint32 fieldIndex = 0; uint32 fieldIndex = 0;
ListCell *fieldSelectCell = NULL; ListCell *fieldSelectCell = NULL;
foreach (fieldSelectCell, compositeFieldList) foreach(fieldSelectCell, compositeFieldList)
{ {
FieldSelect *fieldSelect = (FieldSelect *) lfirst(fieldSelectCell); FieldSelect *fieldSelect = (FieldSelect *) lfirst(fieldSelectCell);
uint32 compositeFieldIndex = 0; uint32 compositeFieldIndex = 0;
@ -3226,9 +3251,10 @@ SupportedLateralQuery(Query *parentQuery, Query *lateralQuery)
if (outerColumnIsPartitionColumn && localColumnIsPartitionColumn) if (outerColumnIsPartitionColumn && localColumnIsPartitionColumn)
{ {
FieldSelect *outerCompositeField = FieldSelect *outerCompositeField =
CompositeFieldRecursive(outerQueryExpression, parentQuery); CompositeFieldRecursive(outerQueryExpression, parentQuery);
FieldSelect *localCompositeField = FieldSelect *localCompositeField =
CompositeFieldRecursive(localQueryExpression, lateralQuery); CompositeFieldRecursive(localQueryExpression, lateralQuery);
/* /*
* If partition colums are composite fields, add them to list to * If partition colums are composite fields, add them to list to
* check later if all composite fields are used. * check later if all composite fields are used.
@ -3251,12 +3277,12 @@ SupportedLateralQuery(Query *parentQuery, Query *lateralQuery)
} }
/* check composite fields */ /* check composite fields */
if(!supportedLateralQuery) if (!supportedLateralQuery)
{ {
bool outerFullCompositeFieldList = bool outerFullCompositeFieldList =
FullCompositeFieldList(outerCompositeFieldList); FullCompositeFieldList(outerCompositeFieldList);
bool localFullCompositeFieldList = bool localFullCompositeFieldList =
FullCompositeFieldList(localCompositeFieldList); FullCompositeFieldList(localCompositeFieldList);
if (outerFullCompositeFieldList && localFullCompositeFieldList) if (outerFullCompositeFieldList && localFullCompositeFieldList)
{ {
@ -3301,15 +3327,15 @@ JoinOnPartitionColumn(Query *query)
if (isLeftColumnPartitionColumn && isRightColumnPartitionColumn) if (isLeftColumnPartitionColumn && isRightColumnPartitionColumn)
{ {
FieldSelect *leftCompositeField = FieldSelect *leftCompositeField =
CompositeFieldRecursive(leftArgument, query); CompositeFieldRecursive(leftArgument, query);
FieldSelect *rightCompositeField = FieldSelect *rightCompositeField =
CompositeFieldRecursive(rightArgument, query); CompositeFieldRecursive(rightArgument, query);
/* /*
* If partition colums are composite fields, add them to list to * If partition colums are composite fields, add them to list to
* check later if all composite fields are used. * check later if all composite fields are used.
*/ */
if(leftCompositeField && rightCompositeField) if (leftCompositeField && rightCompositeField)
{ {
leftCompositeFieldList = lappend(leftCompositeFieldList, leftCompositeFieldList = lappend(leftCompositeFieldList,
leftCompositeField); leftCompositeField);
@ -3318,7 +3344,7 @@ JoinOnPartitionColumn(Query *query)
} }
/* if both sides are not composite fields, they are normal columns */ /* if both sides are not composite fields, they are normal columns */
if(!(leftCompositeField && rightCompositeField)) if (!(leftCompositeField && rightCompositeField))
{ {
joinOnPartitionColumn = true; joinOnPartitionColumn = true;
break; break;
@ -3327,12 +3353,12 @@ JoinOnPartitionColumn(Query *query)
} }
/* check composite fields */ /* check composite fields */
if(!joinOnPartitionColumn) if (!joinOnPartitionColumn)
{ {
bool leftFullCompositeFieldList = bool leftFullCompositeFieldList =
FullCompositeFieldList(leftCompositeFieldList); FullCompositeFieldList(leftCompositeFieldList);
bool rightFullCompositeFieldList = bool rightFullCompositeFieldList =
FullCompositeFieldList(rightCompositeFieldList); FullCompositeFieldList(rightCompositeFieldList);
if (leftFullCompositeFieldList && rightFullCompositeFieldList) if (leftFullCompositeFieldList && rightFullCompositeFieldList)
{ {
@ -3409,7 +3435,7 @@ ErrorIfUnsupportedShardDistribution(Query *query)
/* check if this table has 1-1 shard partitioning with first table */ /* check if this table has 1-1 shard partitioning with first table */
coPartitionedTables = CoPartitionedTables(firstShardIntervalList, coPartitionedTables = CoPartitionedTables(firstShardIntervalList,
currentShardIntervalList); currentShardIntervalList);
if (!coPartitionedTables) if (!coPartitionedTables)
{ {
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@ -3437,7 +3463,7 @@ RelationIdList(Query *query)
foreach(tableEntryCell, tableEntryList) foreach(tableEntryCell, tableEntryList)
{ {
TableEntry *tableEntry = (TableEntry *) lfirst(tableEntryCell); TableEntry *tableEntry = (TableEntry *) lfirst(tableEntryCell);
Oid relationId = tableEntry->relationId; Oid relationId = tableEntry->relationId;
relationIdList = list_append_unique_oid(relationIdList, relationId); relationIdList = list_append_unique_oid(relationIdList, relationId);
@ -3617,7 +3643,7 @@ ExtractQueryWalker(Node *node, List **queryList)
Query *query = (Query *) node; Query *query = (Query *) node;
(*queryList) = lappend(*queryList, query); (*queryList) = lappend(*queryList, query);
walkerResult = query_tree_walker(query, ExtractQueryWalker, queryList, walkerResult = query_tree_walker(query, ExtractQueryWalker, queryList,
QTW_EXAMINE_RTES); QTW_EXAMINE_RTES);
} }
@ -3641,7 +3667,7 @@ LeafQuery(Query *queryTree)
* Extract all range table indexes from the join tree. Note that sub-queries * Extract all range table indexes from the join tree. Note that sub-queries
* that get pulled up by PostgreSQL don't appear in this join tree. * that get pulled up by PostgreSQL don't appear in this join tree.
*/ */
ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
foreach(joinTreeTableIndexCell, joinTreeTableIndexList) foreach(joinTreeTableIndexCell, joinTreeTableIndexList)
{ {
/* /*
@ -3725,7 +3751,7 @@ PartitionColumnOpExpressionList(Query *query)
} }
else if (IsA(leftArgument, Const) && IsA(leftArgument, Var)) else if (IsA(leftArgument, Const) && IsA(leftArgument, Var))
{ {
candidatePartitionColumn = (Var *) rightArgument; candidatePartitionColumn = (Var *) rightArgument;
} }
else else
{ {

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* multi_logical_planner.c * multi_logical_planner.c
* *
* Routines for constructing a logical plan tree from the given Query tree * Routines for constructing a logical plan tree from the given Query tree
* structure. This new logical plan is based on multi-relational algebra rules. * structure. This new logical plan is based on multi-relational algebra rules.
* *
@ -39,11 +39,11 @@ bool SubqueryPushdown = false; /* is subquery pushdown enabled */
/* Function pointer type definition for apply join rule functions */ /* Function pointer type definition for apply join rule functions */
typedef MultiNode * (*RuleApplyFunction) (MultiNode *leftNode, MultiNode *rightNode, typedef MultiNode *(*RuleApplyFunction) (MultiNode *leftNode, MultiNode *rightNode,
Var *partitionColumn, JoinType joinType, Var *partitionColumn, JoinType joinType,
List *joinClauses); List *joinClauses);
static RuleApplyFunction RuleApplyFunctionArray[JOIN_RULE_LAST] = {0}; /* join rules */ static RuleApplyFunction RuleApplyFunctionArray[JOIN_RULE_LAST] = { 0 }; /* join rules */
/* Local functions forward declarations */ /* Local functions forward declarations */
static MultiNode * MultiPlanTree(Query *queryTree); static MultiNode * MultiPlanTree(Query *queryTree);
@ -157,7 +157,7 @@ SubqueryEntryList(Query *queryTree)
* only walk over range table entries at this level and do not recurse into * only walk over range table entries at this level and do not recurse into
* subqueries. * subqueries.
*/ */
ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
foreach(joinTreeTableIndexCell, joinTreeTableIndexList) foreach(joinTreeTableIndexCell, joinTreeTableIndexList)
{ {
/* /*
@ -285,6 +285,7 @@ MultiPlanTree(Query *queryTree)
else else
{ {
bool hasOuterJoin = false; bool hasOuterJoin = false;
/* /*
* We calculate the join order using the list of tables in the query and * We calculate the join order using the list of tables in the query and
* the join clauses between them. Note that this function owns the table * the join clauses between them. Note that this function owns the table
@ -465,6 +466,7 @@ ErrorIfQueryNotSupported(Query *queryTree)
#if (PG_VERSION_NUM >= 90500) #if (PG_VERSION_NUM >= 90500)
/* HasTablesample returns tree if the query contains tablesample */ /* HasTablesample returns tree if the query contains tablesample */
static bool static bool
HasTablesample(Query *queryTree) HasTablesample(Query *queryTree)
@ -485,6 +487,8 @@ HasTablesample(Query *queryTree)
return hasTablesample; return hasTablesample;
} }
#endif #endif
@ -529,7 +533,8 @@ HasUnsupportedJoinWalker(Node *node, void *context)
* ErrorIfSubqueryNotSupported checks that we can perform distributed planning for * ErrorIfSubqueryNotSupported checks that we can perform distributed planning for
* the given subquery. * the given subquery.
*/ */
static void ErrorIfSubqueryNotSupported(Query *subqueryTree) static void
ErrorIfSubqueryNotSupported(Query *subqueryTree)
{ {
char *errorDetail = NULL; char *errorDetail = NULL;
bool preconditionsSatisfied = true; bool preconditionsSatisfied = true;
@ -587,7 +592,6 @@ HasOuterJoin(Query *queryTree)
static bool static bool
HasOuterJoinWalker(Node *node, void *context) HasOuterJoinWalker(Node *node, void *context)
{ {
bool hasOuterJoin = false; bool hasOuterJoin = false;
if (node == NULL) if (node == NULL)
{ {
@ -657,7 +661,7 @@ HasComplexRangeTableType(Query *queryTree)
* Extract all range table indexes from the join tree. Note that sub-queries * Extract all range table indexes from the join tree. Note that sub-queries
* that get pulled up by PostgreSQL don't appear in this join tree. * that get pulled up by PostgreSQL don't appear in this join tree.
*/ */
ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
foreach(joinTreeTableIndexCell, joinTreeTableIndexList) foreach(joinTreeTableIndexCell, joinTreeTableIndexList)
{ {
/* /*
@ -675,7 +679,7 @@ HasComplexRangeTableType(Query *queryTree)
* subquery. * subquery.
*/ */
if (rangeTableEntry->rtekind != RTE_RELATION && if (rangeTableEntry->rtekind != RTE_RELATION &&
rangeTableEntry->rtekind != RTE_SUBQUERY) rangeTableEntry->rtekind != RTE_SUBQUERY)
{ {
hasComplexRangeTableType = true; hasComplexRangeTableType = true;
} }
@ -966,7 +970,7 @@ TableEntryList(List *rangeTableList)
foreach(rangeTableCell, rangeTableList) foreach(rangeTableCell, rangeTableList)
{ {
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell); RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell);
if (rangeTableEntry->rtekind == RTE_RELATION) if (rangeTableEntry->rtekind == RTE_RELATION)
{ {
@ -1178,8 +1182,8 @@ IsSelectClause(Node *clause)
/* we currently consider the following nodes as select clauses */ /* we currently consider the following nodes as select clauses */
NodeTag nodeTag = nodeTag(clause); NodeTag nodeTag = nodeTag(clause);
if ( !(nodeTag == T_OpExpr || nodeTag == T_ScalarArrayOpExpr || if (!(nodeTag == T_OpExpr || nodeTag == T_ScalarArrayOpExpr ||
nodeTag == T_NullTest || nodeTag == T_BooleanTest) ) nodeTag == T_NullTest || nodeTag == T_BooleanTest))
{ {
return false; return false;
} }
@ -1317,9 +1321,9 @@ UnaryOperator(MultiNode *node)
{ {
bool unaryOperator = false; bool unaryOperator = false;
if (CitusIsA(node, MultiTreeRoot) || CitusIsA(node, MultiTable) || if (CitusIsA(node, MultiTreeRoot) || CitusIsA(node, MultiTable) ||
CitusIsA(node, MultiCollect) || CitusIsA(node, MultiSelect) || CitusIsA(node, MultiCollect) || CitusIsA(node, MultiSelect) ||
CitusIsA(node, MultiProject) || CitusIsA(node, MultiPartition) || CitusIsA(node, MultiProject) || CitusIsA(node, MultiPartition) ||
CitusIsA(node, MultiExtendedOp)) CitusIsA(node, MultiExtendedOp))
{ {
unaryOperator = true; unaryOperator = true;
@ -1403,7 +1407,7 @@ FindNodesOfType(MultiNode *node, int type)
} }
else if (BinaryOperator(node)) else if (BinaryOperator(node))
{ {
MultiNode *leftChildNode = ((MultiBinaryNode *) node)->leftChildNode; MultiNode *leftChildNode = ((MultiBinaryNode *) node)->leftChildNode;
MultiNode *rightChildNode = ((MultiBinaryNode *) node)->rightChildNode; MultiNode *rightChildNode = ((MultiBinaryNode *) node)->rightChildNode;
List *leftChildNodeList = FindNodesOfType(leftChildNode, type); List *leftChildNodeList = FindNodesOfType(leftChildNode, type);
@ -1533,9 +1537,9 @@ ExtractRangeTableEntryWalker(Node *node, List **rangeTableList)
List * List *
pull_var_clause_default(Node *node) pull_var_clause_default(Node *node)
{ {
List *columnList = pull_var_clause(node, PVC_RECURSE_AGGREGATES, List *columnList = pull_var_clause(node, PVC_RECURSE_AGGREGATES,
PVC_REJECT_PLACEHOLDERS); PVC_REJECT_PLACEHOLDERS);
return columnList; return columnList;
} }
@ -1552,7 +1556,7 @@ ApplyJoinRule(MultiNode *leftNode, MultiNode *rightNode, JoinRuleType ruleType,
MultiNode *multiNode = NULL; MultiNode *multiNode = NULL;
List *applicableJoinClauses = NIL; List *applicableJoinClauses = NIL;
List *leftTableIdList = OutputTableIdList(leftNode); List *leftTableIdList = OutputTableIdList(leftNode);
List *rightTableIdList = OutputTableIdList(rightNode); List *rightTableIdList = OutputTableIdList(rightNode);
int rightTableIdCount = 0; int rightTableIdCount = 0;
uint32 rightTableId = 0; uint32 rightTableId = 0;
@ -1567,8 +1571,8 @@ ApplyJoinRule(MultiNode *leftNode, MultiNode *rightNode, JoinRuleType ruleType,
/* call the join rule application function to create the new join node */ /* call the join rule application function to create the new join node */
ruleApplyFunction = JoinRuleApplyFunction(ruleType); ruleApplyFunction = JoinRuleApplyFunction(ruleType);
multiNode = (*ruleApplyFunction) (leftNode, rightNode, partitionColumn, multiNode = (*ruleApplyFunction)(leftNode, rightNode, partitionColumn,
joinType, applicableJoinClauses); joinType, applicableJoinClauses);
if (joinType != JOIN_INNER && CitusIsA(multiNode, MultiJoin)) if (joinType != JOIN_INNER && CitusIsA(multiNode, MultiJoin))
{ {
@ -1918,7 +1922,7 @@ ErrorIfSubqueryJoin(Query *queryTree)
* Extract all range table indexes from the join tree. Note that sub-queries * Extract all range table indexes from the join tree. Note that sub-queries
* that get pulled up by PostgreSQL don't appear in this join tree. * that get pulled up by PostgreSQL don't appear in this join tree.
*/ */
ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
joiningRangeTableCount = list_length(joinTreeTableIndexList); joiningRangeTableCount = list_length(joinTreeTableIndexList);
if (joiningRangeTableCount > 1) if (joiningRangeTableCount > 1)

View File

@ -122,7 +122,7 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
AggStrategy aggregateStrategy = AGG_PLAIN; AggStrategy aggregateStrategy = AGG_PLAIN;
AggClauseCosts aggregateCosts; AggClauseCosts aggregateCosts;
AttrNumber *groupColumnIdArray = NULL; AttrNumber *groupColumnIdArray = NULL;
List *aggregateTargetList = NIL; List *aggregateTargetList = NIL;
List *groupColumnList = NIL; List *groupColumnList = NIL;
List *columnList = NIL; List *columnList = NIL;
ListCell *columnCell = NULL; ListCell *columnCell = NULL;
@ -168,13 +168,13 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
/* finally create the plan */ /* finally create the plan */
#if (PG_VERSION_NUM >= 90500) #if (PG_VERSION_NUM >= 90500)
aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy, aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy,
&aggregateCosts, groupColumnCount, groupColumnIdArray, &aggregateCosts, groupColumnCount, groupColumnIdArray,
groupColumnOpArray, NIL, rowEstimate, subPlan); groupColumnOpArray, NIL, rowEstimate, subPlan);
#else #else
aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy, aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy,
&aggregateCosts, groupColumnCount, groupColumnIdArray, &aggregateCosts, groupColumnCount, groupColumnIdArray,
groupColumnOpArray, rowEstimate, subPlan); groupColumnOpArray, rowEstimate, subPlan);
#endif #endif
return aggregatePlan; return aggregatePlan;
@ -211,7 +211,7 @@ BuildSelectStatement(Query *masterQuery, char *masterTableName,
rangeTableEntry = copyObject(queryRangeTableEntry); rangeTableEntry = copyObject(queryRangeTableEntry);
rangeTableEntry->rtekind = RTE_RELATION; rangeTableEntry->rtekind = RTE_RELATION;
rangeTableEntry->eref = makeAlias(masterTableName, NIL); rangeTableEntry->eref = makeAlias(masterTableName, NIL);
rangeTableEntry->relid = 0; /* to be filled in exec_Start */ rangeTableEntry->relid = 0; /* to be filled in exec_Start */
rangeTableEntry->inh = false; rangeTableEntry->inh = false;
rangeTableEntry->inFromCl = true; rangeTableEntry->inFromCl = true;
@ -220,7 +220,7 @@ BuildSelectStatement(Query *masterQuery, char *masterTableName,
/* (2) build and initialize sequential scan node */ /* (2) build and initialize sequential scan node */
sequentialScan = makeNode(SeqScan); sequentialScan = makeNode(SeqScan);
sequentialScan->scanrelid = 1; /* always one */ sequentialScan->scanrelid = 1; /* always one */
/* (3) add an aggregation plan if needed */ /* (3) add an aggregation plan if needed */
if (masterQuery->hasAggs || masterQuery->groupClause) if (masterQuery->hasAggs || masterQuery->groupClause)

View File

@ -138,7 +138,7 @@ static OpExpr * MakeOpExpressionWithZeroConst(void);
static List * BuildRestrictInfoList(List *qualList); static List * BuildRestrictInfoList(List *qualList);
static List * FragmentCombinationList(List *rangeTableFragmentsList, Query *jobQuery, static List * FragmentCombinationList(List *rangeTableFragmentsList, Query *jobQuery,
List *dependedJobList); List *dependedJobList);
static JoinSequenceNode * JoinSequenceArray(List * rangeTableFragmentsList, static JoinSequenceNode * JoinSequenceArray(List *rangeTableFragmentsList,
Query *jobQuery, List *dependedJobList); Query *jobQuery, List *dependedJobList);
static bool PartitionedOnColumn(Var *column, List *rangeTableList, List *dependedJobList); static bool PartitionedOnColumn(Var *column, List *rangeTableList, List *dependedJobList);
static void CheckJoinBetweenColumns(OpExpr *joinClause); static void CheckJoinBetweenColumns(OpExpr *joinClause);
@ -155,7 +155,8 @@ static StringInfo DatumArrayString(Datum *datumArray, uint32 datumCount, Oid dat
static Task * CreateBasicTask(uint64 jobId, uint32 taskId, TaskType taskType, static Task * CreateBasicTask(uint64 jobId, uint32 taskId, TaskType taskType,
char *queryString); char *queryString);
static void UpdateRangeTableAlias(List *rangeTableList, List *fragmentList); static void UpdateRangeTableAlias(List *rangeTableList, List *fragmentList);
static Alias * FragmentAlias(RangeTblEntry *rangeTableEntry, RangeTableFragment *fragment); static Alias * FragmentAlias(RangeTblEntry *rangeTableEntry,
RangeTableFragment *fragment);
static uint64 AnchorShardId(List *fragmentList, uint32 anchorRangeTableId); static uint64 AnchorShardId(List *fragmentList, uint32 anchorRangeTableId);
static List * PruneSqlTaskDependencies(List *sqlTaskList); static List * PruneSqlTaskDependencies(List *sqlTaskList);
static List * AssignTaskList(List *sqlTaskList); static List * AssignTaskList(List *sqlTaskList);
@ -167,7 +168,7 @@ static Task * GreedyAssignTask(WorkerNode *workerNode, List *taskList,
static List * RoundRobinAssignTaskList(List *taskList); static List * RoundRobinAssignTaskList(List *taskList);
static List * RoundRobinReorder(Task *task, List *placementList); static List * RoundRobinReorder(Task *task, List *placementList);
static List * ReorderAndAssignTaskList(List *taskList, static List * ReorderAndAssignTaskList(List *taskList,
List * (*reorderFunction) (Task *, List *)); List * (*reorderFunction)(Task *, List *));
static int CompareTasksByShardId(const void *leftElement, const void *rightElement); static int CompareTasksByShardId(const void *leftElement, const void *rightElement);
static List * ActiveShardPlacementLists(List *taskList); static List * ActiveShardPlacementLists(List *taskList);
static List * ActivePlacementList(List *placementList); static List * ActivePlacementList(List *placementList);
@ -309,6 +310,7 @@ BuildJobTree(MultiTreeRoot *multiTree)
partitionKey, partitionType, partitionKey, partitionType,
baseRelationId, baseRelationId,
JOIN_MAP_MERGE_JOB); JOIN_MAP_MERGE_JOB);
/* reset depended job list */ /* reset depended job list */
loopDependedJobList = NIL; loopDependedJobList = NIL;
loopDependedJobList = list_make1(mapMergeJob); loopDependedJobList = list_make1(mapMergeJob);
@ -538,7 +540,7 @@ BuildJobQuery(MultiNode *multiNode, List *dependedJobList)
* If we are building this query on a repartitioned subquery job then we * If we are building this query on a repartitioned subquery job then we
* don't need to update column attributes. * don't need to update column attributes.
*/ */
if(dependedJobList != NIL) if (dependedJobList != NIL)
{ {
Job *job = (Job *) linitial(dependedJobList); Job *job = (Job *) linitial(dependedJobList);
if (CitusIsA(job, MapMergeJob)) if (CitusIsA(job, MapMergeJob))
@ -628,10 +630,10 @@ BuildJobQuery(MultiNode *multiNode, List *dependedJobList)
jobQuery->rtable = rangeTableList; jobQuery->rtable = rangeTableList;
jobQuery->targetList = targetList; jobQuery->targetList = targetList;
jobQuery->jointree = joinTree; jobQuery->jointree = joinTree;
jobQuery->sortClause = sortClauseList; jobQuery->sortClause = sortClauseList;
jobQuery->groupClause = groupClauseList; jobQuery->groupClause = groupClauseList;
jobQuery->limitOffset = limitOffset; jobQuery->limitOffset = limitOffset;
jobQuery->limitCount = limitCount; jobQuery->limitCount = limitCount;
jobQuery->hasAggs = contain_agg_clause((Node *) targetList); jobQuery->hasAggs = contain_agg_clause((Node *) targetList);
return jobQuery; return jobQuery;
@ -718,10 +720,10 @@ BuildReduceQuery(MultiExtendedOp *extendedOpNode, List *dependedJobList)
reduceQuery->rtable = derivedRangeTableList; reduceQuery->rtable = derivedRangeTableList;
reduceQuery->targetList = targetList; reduceQuery->targetList = targetList;
reduceQuery->jointree = joinTree; reduceQuery->jointree = joinTree;
reduceQuery->sortClause = extendedOpNode->sortClauseList; reduceQuery->sortClause = extendedOpNode->sortClauseList;
reduceQuery->groupClause = extendedOpNode->groupClauseList; reduceQuery->groupClause = extendedOpNode->groupClauseList;
reduceQuery->limitOffset = extendedOpNode->limitOffset; reduceQuery->limitOffset = extendedOpNode->limitOffset;
reduceQuery->limitCount = extendedOpNode->limitCount; reduceQuery->limitCount = extendedOpNode->limitCount;
reduceQuery->hasAggs = contain_agg_clause((Node *) targetList); reduceQuery->hasAggs = contain_agg_clause((Node *) targetList);
return reduceQuery; return reduceQuery;
@ -754,7 +756,7 @@ BaseRangeTableList(MultiNode *multiNode)
*/ */
MultiTable *multiTable = (MultiTable *) multiNode; MultiTable *multiTable = (MultiTable *) multiNode;
if (multiTable->relationId != SUBQUERY_RELATION_ID && if (multiTable->relationId != SUBQUERY_RELATION_ID &&
multiTable->relationId != HEAP_ANALYTICS_SUBQUERY_RELATION_ID) multiTable->relationId != HEAP_ANALYTICS_SUBQUERY_RELATION_ID)
{ {
RangeTblEntry *rangeTableEntry = makeNode(RangeTblEntry); RangeTblEntry *rangeTableEntry = makeNode(RangeTblEntry);
rangeTableEntry->inFromCl = true; rangeTableEntry->inFromCl = true;
@ -870,7 +872,7 @@ TargetEntryList(List *expressionList)
Expr *expression = (Expr *) lfirst(expressionCell); Expr *expression = (Expr *) lfirst(expressionCell);
TargetEntry *targetEntry = makeTargetEntry(expression, TargetEntry *targetEntry = makeTargetEntry(expression,
list_length(targetEntryList)+1, list_length(targetEntryList) + 1,
NULL, false); NULL, false);
targetEntryList = lappend(targetEntryList, targetEntry); targetEntryList = lappend(targetEntryList, targetEntry);
} }
@ -1044,7 +1046,7 @@ QueryJoinTree(MultiNode *multiNode, List *dependedJobList, List **rangeTableList
/* fix the column attributes in ON (...) clauses */ /* fix the column attributes in ON (...) clauses */
columnList = pull_var_clause_default((Node *) joinNode->joinClauseList); columnList = pull_var_clause_default((Node *) joinNode->joinClauseList);
foreach (columnCell, columnList) foreach(columnCell, columnList)
{ {
Var *column = (Var *) lfirst(columnCell); Var *column = (Var *) lfirst(columnCell);
UpdateColumnAttributes(column, *rangeTableList, dependedJobList); UpdateColumnAttributes(column, *rangeTableList, dependedJobList);
@ -1093,7 +1095,8 @@ QueryJoinTree(MultiNode *multiNode, List *dependedJobList, List **rangeTableList
uint32 columnCount = (uint32) list_length(dependedTargetList); uint32 columnCount = (uint32) list_length(dependedTargetList);
List *columnNameList = DerivedColumnNameList(columnCount, dependedJob->jobId); List *columnNameList = DerivedColumnNameList(columnCount, dependedJob->jobId);
RangeTblEntry *rangeTableEntry = DerivedRangeTableEntry(multiNode, columnNameList, RangeTblEntry *rangeTableEntry = DerivedRangeTableEntry(multiNode,
columnNameList,
tableIdList); tableIdList);
RangeTblRef *rangeTableRef = makeNode(RangeTblRef); RangeTblRef *rangeTableRef = makeNode(RangeTblRef);
@ -1246,7 +1249,7 @@ ExtractColumns(RangeTblEntry *rangeTableEntry, int rangeTableId, List *dependedJ
else if (rangeTableKind == CITUS_RTE_RELATION) else if (rangeTableKind == CITUS_RTE_RELATION)
{ {
/* /*
* For distributed tables, we construct a regular table RTE to call * For distributed tables, we construct a regular table RTE to call
* expandRTE, which will extract columns from the distributed table * expandRTE, which will extract columns from the distributed table
* schema. * schema.
*/ */
@ -1405,10 +1408,10 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
jobQuery->rtable = rangeTableList; jobQuery->rtable = rangeTableList;
jobQuery->targetList = targetList; jobQuery->targetList = targetList;
jobQuery->jointree = joinTree; jobQuery->jointree = joinTree;
jobQuery->sortClause = sortClauseList; jobQuery->sortClause = sortClauseList;
jobQuery->groupClause = groupClauseList; jobQuery->groupClause = groupClauseList;
jobQuery->limitOffset = limitOffset; jobQuery->limitOffset = limitOffset;
jobQuery->limitCount = limitCount; jobQuery->limitCount = limitCount;
jobQuery->hasAggs = contain_agg_clause((Node *) targetList); jobQuery->hasAggs = contain_agg_clause((Node *) targetList);
return jobQuery; return jobQuery;
@ -1646,7 +1649,7 @@ static uint64
UniqueJobId(void) UniqueJobId(void)
{ {
text *sequenceName = cstring_to_text(JOBID_SEQUENCE_NAME); text *sequenceName = cstring_to_text(JOBID_SEQUENCE_NAME);
Oid sequenceId = ResolveRelationId(sequenceName); Oid sequenceId = ResolveRelationId(sequenceName);
Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId); Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId);
/* generate new and unique jobId from sequence */ /* generate new and unique jobId from sequence */
@ -1747,7 +1750,7 @@ HashPartitionCount(void)
uint32 nodeCount = WorkerGetLiveNodeCount(); uint32 nodeCount = WorkerGetLiveNodeCount();
double maxReduceTasksPerNode = MaxRunningTasksPerNode / 2.0; double maxReduceTasksPerNode = MaxRunningTasksPerNode / 2.0;
uint32 partitionCount = (uint32) rint(nodeCount * maxReduceTasksPerNode); uint32 partitionCount = (uint32) rint(nodeCount * maxReduceTasksPerNode);
return partitionCount; return partitionCount;
} }
@ -1864,8 +1867,9 @@ SplitPointObject(ShardInterval **shardIntervalArray, uint32 shardIntervalCount)
return splitPointObject; return splitPointObject;
} }
/* ------------------------------------------------------------ /* ------------------------------------------------------------
* Functions that relate to building and assigning tasks follow * Functions that relate to building and assigning tasks follow
* ------------------------------------------------------------ * ------------------------------------------------------------
*/ */
@ -1986,7 +1990,7 @@ SubquerySqlTaskList(Job *job)
ListCell *rangeTableCell = NULL; ListCell *rangeTableCell = NULL;
ListCell *queryCell = NULL; ListCell *queryCell = NULL;
Node *whereClauseTree = NULL; Node *whereClauseTree = NULL;
uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */ uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */
uint32 anchorRangeTableId = 0; uint32 anchorRangeTableId = 0;
uint32 rangeTableIndex = 0; uint32 rangeTableIndex = 0;
const uint32 fragmentSize = sizeof(RangeTableFragment); const uint32 fragmentSize = sizeof(RangeTableFragment);
@ -2036,10 +2040,10 @@ SubquerySqlTaskList(Job *job)
if (opExpressionList != NIL) if (opExpressionList != NIL)
{ {
Var *partitionColumn = PartitionColumn(relationId, tableId); Var *partitionColumn = PartitionColumn(relationId, tableId);
List *whereClauseList = ReplaceColumnsInOpExpressionList(opExpressionList, List *whereClauseList = ReplaceColumnsInOpExpressionList(opExpressionList,
partitionColumn); partitionColumn);
finalShardIntervalList = PruneShardList(relationId, tableId, whereClauseList, finalShardIntervalList = PruneShardList(relationId, tableId, whereClauseList,
shardIntervalList); shardIntervalList);
} }
else else
{ {
@ -2146,7 +2150,7 @@ static List *
SqlTaskList(Job *job) SqlTaskList(Job *job)
{ {
List *sqlTaskList = NIL; List *sqlTaskList = NIL;
uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */ uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */
uint64 jobId = job->jobId; uint64 jobId = job->jobId;
bool anchorRangeTableBasedAssignment = false; bool anchorRangeTableBasedAssignment = false;
uint32 anchorRangeTableId = 0; uint32 anchorRangeTableId = 0;
@ -2472,8 +2476,8 @@ RangeTableFragmentsList(List *rangeTableList, List *whereClauseList,
List *shardIntervalList = LoadShardIntervalList(relationId); List *shardIntervalList = LoadShardIntervalList(relationId);
List *prunedShardIntervalList = PruneShardList(relationId, tableId, List *prunedShardIntervalList = PruneShardList(relationId, tableId,
whereClauseList, whereClauseList,
shardIntervalList); shardIntervalList);
/* /*
* If we prune all shards for one table, query results will be empty. * If we prune all shards for one table, query results will be empty.
@ -2548,7 +2552,7 @@ RangeTableFragmentsList(List *rangeTableList, List *whereClauseList,
*/ */
List * List *
PruneShardList(Oid relationId, Index tableId, List *whereClauseList, PruneShardList(Oid relationId, Index tableId, List *whereClauseList,
List *shardIntervalList) List *shardIntervalList)
{ {
List *remainingShardList = NIL; List *remainingShardList = NIL;
ListCell *shardIntervalCell = NULL; ListCell *shardIntervalCell = NULL;
@ -2653,7 +2657,7 @@ MakeOpExpression(Var *variable, int16 strategyNumber)
Oid accessMethodId = BTREE_AM_OID; Oid accessMethodId = BTREE_AM_OID;
Oid operatorId = InvalidOid; Oid operatorId = InvalidOid;
Oid operatorClassInputType = InvalidOid; Oid operatorClassInputType = InvalidOid;
Const *constantValue = NULL; Const *constantValue = NULL;
OpExpr *expression = NULL; OpExpr *expression = NULL;
char typeType = 0; char typeType = 0;
@ -2679,7 +2683,7 @@ MakeOpExpression(Var *variable, int16 strategyNumber)
/* Now make the expression with the given variable and a null constant */ /* Now make the expression with the given variable and a null constant */
expression = (OpExpr *) make_opclause(operatorId, expression = (OpExpr *) make_opclause(operatorId,
InvalidOid, /* no result type yet */ InvalidOid, /* no result type yet */
false, /* no return set */ false, /* no return set */
(Expr *) variable, (Expr *) variable,
(Expr *) constantValue, (Expr *) constantValue,
InvalidOid, collationId); InvalidOid, collationId);
@ -2808,6 +2812,10 @@ SimpleOpExpression(Expr *clause)
return false; /* not a binary opclause */ return false; /* not a binary opclause */
} }
/* strip coercions before doing check */
leftOperand = strip_implicit_coercions(leftOperand);
rightOperand = strip_implicit_coercions(rightOperand);
if (IsA(rightOperand, Const) && IsA(leftOperand, Var)) if (IsA(rightOperand, Const) && IsA(leftOperand, Var))
{ {
constantClause = (Const *) rightOperand; constantClause = (Const *) rightOperand;
@ -2896,7 +2904,7 @@ HashableClauseMutator(Node *originalNode, Var *partitionColumn)
* If this node is not hashable, continue walking down the expression tree * If this node is not hashable, continue walking down the expression tree
* to find and hash clauses which are eligible. * to find and hash clauses which are eligible.
*/ */
if(newNode == NULL) if (newNode == NULL)
{ {
newNode = expression_tree_mutator(originalNode, HashableClauseMutator, newNode = expression_tree_mutator(originalNode, HashableClauseMutator,
(void *) partitionColumn); (void *) partitionColumn);
@ -2919,6 +2927,10 @@ OpExpressionContainsColumn(OpExpr *operatorExpression, Var *partitionColumn)
Node *rightOperand = get_rightop((Expr *) operatorExpression); Node *rightOperand = get_rightop((Expr *) operatorExpression);
Var *column = NULL; Var *column = NULL;
/* strip coercions before doing check */
leftOperand = strip_implicit_coercions(leftOperand);
rightOperand = strip_implicit_coercions(rightOperand);
if (IsA(leftOperand, Var)) if (IsA(leftOperand, Var))
{ {
column = (Var *) leftOperand; column = (Var *) leftOperand;
@ -3037,7 +3049,7 @@ MakeInt4Constant(Datum constantValue)
bool constantIsNull = false; bool constantIsNull = false;
bool constantByValue = true; bool constantByValue = true;
Const *int4Constant = makeConst(constantType, constantTypeMode, constantCollationId, Const *int4Constant = makeConst(constantType, constantTypeMode, constantCollationId,
constantLength, constantValue, constantIsNull, constantLength, constantValue, constantIsNull,
constantByValue); constantByValue);
return int4Constant; return int4Constant;
@ -3094,7 +3106,7 @@ UpdateConstraint(Node *baseConstraint, ShardInterval *shardInterval)
Node *greaterThanExpr = (Node *) lsecond(andExpr->args); Node *greaterThanExpr = (Node *) lsecond(andExpr->args);
Node *minNode = get_rightop((Expr *) greaterThanExpr); /* right op */ Node *minNode = get_rightop((Expr *) greaterThanExpr); /* right op */
Node *maxNode = get_rightop((Expr *) lessThanExpr); /* right op */ Node *maxNode = get_rightop((Expr *) lessThanExpr); /* right op */
Const *minConstant = NULL; Const *minConstant = NULL;
Const *maxConstant = NULL; Const *maxConstant = NULL;
@ -3265,7 +3277,7 @@ JoinSequenceArray(List *rangeTableFragmentsList, Query *jobQuery, List *depended
joinSequenceArray[joinedTableCount].joiningRangeTableId = NON_PRUNABLE_JOIN; joinSequenceArray[joinedTableCount].joiningRangeTableId = NON_PRUNABLE_JOIN;
joinedTableCount++; joinedTableCount++;
foreach (joinExprCell, joinExprList) foreach(joinExprCell, joinExprList)
{ {
JoinExpr *joinExpr = (JoinExpr *) lfirst(joinExprCell); JoinExpr *joinExpr = (JoinExpr *) lfirst(joinExprCell);
JoinType joinType = joinExpr->jointype; JoinType joinType = joinExpr->jointype;
@ -3339,7 +3351,7 @@ JoinSequenceArray(List *rangeTableFragmentsList, Query *jobQuery, List *depended
if (IS_OUTER_JOIN(joinType)) if (IS_OUTER_JOIN(joinType))
{ {
int innerRangeTableId = 0; int innerRangeTableId = 0;
List * tableFragments = NIL; List *tableFragments = NIL;
int fragmentCount = 0; int fragmentCount = 0;
if (joinType == JOIN_RIGHT) if (joinType == JOIN_RIGHT)
@ -3492,7 +3504,7 @@ FindRangeTableFragmentsList(List *rangeTableFragmentsList, int tableId)
if (tableFragments != NIL) if (tableFragments != NIL)
{ {
RangeTableFragment *tableFragment = RangeTableFragment *tableFragment =
(RangeTableFragment*) linitial(tableFragments); (RangeTableFragment *) linitial(tableFragments);
if (tableFragment->rangeTableId == tableId) if (tableFragment->rangeTableId == tableId)
{ {
foundTableFragments = tableFragments; foundTableFragments = tableFragments;
@ -3698,7 +3710,7 @@ UniqueFragmentList(List *fragmentList)
foreach(uniqueFragmentCell, uniqueFragmentList) foreach(uniqueFragmentCell, uniqueFragmentList)
{ {
RangeTableFragment *uniqueFragment = RangeTableFragment *uniqueFragment =
(RangeTableFragment *) lfirst(uniqueFragmentCell); (RangeTableFragment *) lfirst(uniqueFragmentCell);
uint64 *uniqueShardId = uniqueFragment->fragmentReference; uint64 *uniqueShardId = uniqueFragment->fragmentReference;
if (*shardId == *uniqueShardId) if (*shardId == *uniqueShardId)
@ -4038,6 +4050,7 @@ FragmentAlias(RangeTblEntry *rangeTableEntry, RangeTableFragment *fragment)
return alias; return alias;
} }
/* /*
* AnchorShardId walks over each fragment in the given fragment list, finds the * AnchorShardId walks over each fragment in the given fragment list, finds the
* fragment that corresponds to the given anchor range tableId, and returns this * fragment that corresponds to the given anchor range tableId, and returns this
@ -4352,7 +4365,7 @@ MergeTaskList(MapMergeJob *mapMergeJob, List *mapTaskList, uint32 taskIdIndex)
StringInfo intermediateTableQueryString = StringInfo intermediateTableQueryString =
IntermediateTableQueryString(jobId, taskIdIndex, reduceQuery); IntermediateTableQueryString(jobId, taskIdIndex, reduceQuery);
StringInfo mergeAndRunQueryString= makeStringInfo(); StringInfo mergeAndRunQueryString = makeStringInfo();
appendStringInfo(mergeAndRunQueryString, MERGE_FILES_AND_RUN_QUERY_COMMAND, appendStringInfo(mergeAndRunQueryString, MERGE_FILES_AND_RUN_QUERY_COMMAND,
jobId, taskIdIndex, mergeTableQueryString->data, jobId, taskIdIndex, mergeTableQueryString->data,
intermediateTableQueryString->data); intermediateTableQueryString->data);
@ -4678,7 +4691,7 @@ TaskListAppendUnique(List *list, Task *task)
List * List *
TaskListConcatUnique(List *list1, List *list2) TaskListConcatUnique(List *list1, List *list2)
{ {
ListCell *taskCell = NULL; ListCell *taskCell = NULL;
foreach(taskCell, list2) foreach(taskCell, list2)
{ {
@ -4952,7 +4965,7 @@ List *
FirstReplicaAssignTaskList(List *taskList) FirstReplicaAssignTaskList(List *taskList)
{ {
/* No additional reordering need take place for this algorithm */ /* No additional reordering need take place for this algorithm */
List * (*reorderFunction)(Task *, List *) = NULL; List *(*reorderFunction)(Task *, List *) = NULL;
taskList = ReorderAndAssignTaskList(taskList, reorderFunction); taskList = ReorderAndAssignTaskList(taskList, reorderFunction);
@ -4976,6 +4989,7 @@ RoundRobinAssignTaskList(List *taskList)
return taskList; return taskList;
} }
/* /*
* RoundRobinReorder implements the core of the round-robin assignment policy. * RoundRobinReorder implements the core of the round-robin assignment policy.
* It takes a task and placement list and rotates a copy of the placement list * It takes a task and placement list and rotates a copy of the placement list
@ -5108,7 +5122,8 @@ ActiveShardPlacementLists(List *taskList)
List *activeShardPlacementList = ActivePlacementList(shardPlacementList); List *activeShardPlacementList = ActivePlacementList(shardPlacementList);
/* sort shard placements by their insertion time */ /* sort shard placements by their insertion time */
activeShardPlacementList = SortList(activeShardPlacementList, CompareShardPlacements); activeShardPlacementList = SortList(activeShardPlacementList,
CompareShardPlacements);
shardPlacementLists = lappend(shardPlacementLists, activeShardPlacementList); shardPlacementLists = lappend(shardPlacementLists, activeShardPlacementList);
} }
@ -5249,7 +5264,8 @@ AssignDualHashTaskList(List *taskList)
uint32 replicaIndex = 0; uint32 replicaIndex = 0;
for (replicaIndex = 0; replicaIndex < ShardReplicationFactor; replicaIndex++) for (replicaIndex = 0; replicaIndex < ShardReplicationFactor; replicaIndex++)
{ {
uint32 assignmentOffset = beginningNodeIndex + assignedTaskIndex + replicaIndex; uint32 assignmentOffset = beginningNodeIndex + assignedTaskIndex +
replicaIndex;
uint32 assignmentIndex = assignmentOffset % workerNodeCount; uint32 assignmentIndex = assignmentOffset % workerNodeCount;
WorkerNode *workerNode = list_nth(workerNodeList, assignmentIndex); WorkerNode *workerNode = list_nth(workerNodeList, assignmentIndex);

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* multi_planner.c * multi_planner.c
* General CitusDB planner code. * General Citus planner code.
* *
* Copyright (c) 2012-2015, Citus Data, Inc. * Copyright (c) 2012-2015, Citus Data, Inc.
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
@ -45,26 +45,17 @@ multi_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
PlannedStmt *result = NULL; PlannedStmt *result = NULL;
/* /*
* First call into standard planner. This is required because the CitusDB * First call into standard planner. This is required because the Citus
* planner relies on parse tree transformations made by postgres' planner. * planner relies on parse tree transformations made by postgres' planner.
*/ */
result = standard_planner(parse, cursorOptions, boundParams); result = standard_planner(parse, cursorOptions, boundParams);
if (NeedsDistributedPlanning(parse)) if (NeedsDistributedPlanning(parse))
{ {
MemoryContext oldcontext = NULL; MultiPlan *physicalPlan = CreatePhysicalPlan(parse);
MultiPlan *physicalPlan = NULL;
/* Switch to top level message context */
oldcontext = MemoryContextSwitchTo(MessageContext);
physicalPlan = CreatePhysicalPlan(parse);
/* store required data into the planned statement */ /* store required data into the planned statement */
result = MultiQueryContainerNode(result, physicalPlan); result = MultiQueryContainerNode(result, physicalPlan);
/* Now switch back to original context */
MemoryContextSwitchTo(oldcontext);
} }
return result; return result;
@ -99,7 +90,7 @@ CreatePhysicalPlan(Query *parse)
/* /*
* This check is here to make it likely that all node types used in * This check is here to make it likely that all node types used in
* CitusDB are dumpable. Explain can dump logical and physical plans * Citus are dumpable. Explain can dump logical and physical plans
* using the extended outfuncs infrastructure, but it's infeasible to * using the extended outfuncs infrastructure, but it's infeasible to
* test most plans. MultiQueryContainerNode always serializes the * test most plans. MultiQueryContainerNode always serializes the
* physical plan, so there's no need to check that separately. * physical plan, so there's no need to check that separately.
@ -141,7 +132,7 @@ HasCitusToplevelNode(PlannedStmt *result)
* yet. Directly return false, part of the required infrastructure for * yet. Directly return false, part of the required infrastructure for
* further checks might not be present. * further checks might not be present.
*/ */
if (!CitusDBHasBeenLoaded()) if (!CitusHasBeenLoaded())
{ {
return false; return false;
} }
@ -268,7 +259,7 @@ GetMultiPlanString(PlannedStmt *result)
if (list_length(fauxFuncExpr->args) != 1) if (list_length(fauxFuncExpr->args) != 1)
{ {
ereport(ERROR, (errmsg("unexpected number of function arguments to " ereport(ERROR, (errmsg("unexpected number of function arguments to "
"citusdb_extradata_container"))); "citus_extradata_container")));
} }
multiPlanData = (Const *) linitial(fauxFuncExpr->args); multiPlanData = (Const *) linitial(fauxFuncExpr->args);

View File

@ -35,7 +35,7 @@
/* Local functions forward declarations */ /* Local functions forward declarations */
static bool TypeAddIndexConstraint(const AlterTableCmd *command); static bool TypeAddIndexConstraint(const AlterTableCmd *command);
static bool TypeDropIndexConstraint(const AlterTableCmd *command, static bool TypeDropIndexConstraint(const AlterTableCmd *command,
const RangeVar *relation, uint64 shardId); const RangeVar *relation, uint64 shardId);
static void AppendShardIdToConstraintName(AlterTableCmd *command, uint64 shardId); static void AppendShardIdToConstraintName(AlterTableCmd *command, uint64 shardId);
@ -67,7 +67,7 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId)
AppendShardIdToName(sequenceName, shardId); AppendShardIdToName(sequenceName, shardId);
break; break;
} }
case T_AlterTableStmt: case T_AlterTableStmt:
{ {
/* /*
@ -79,7 +79,7 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId)
AlterTableStmt *alterTableStmt = (AlterTableStmt *) parseTree; AlterTableStmt *alterTableStmt = (AlterTableStmt *) parseTree;
char **relationName = &(alterTableStmt->relation->relname); char **relationName = &(alterTableStmt->relation->relname);
RangeVar *relation = alterTableStmt->relation; /* for constraints */ RangeVar *relation = alterTableStmt->relation; /* for constraints */
List *commandList = alterTableStmt->cmds; List *commandList = alterTableStmt->cmds;
ListCell *commandCell = NULL; ListCell *commandCell = NULL;
@ -179,15 +179,15 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId)
objectType == OBJECT_INDEX || objectType == OBJECT_FOREIGN_TABLE || objectType == OBJECT_INDEX || objectType == OBJECT_FOREIGN_TABLE ||
objectType == OBJECT_FOREIGN_SERVER) objectType == OBJECT_FOREIGN_SERVER)
{ {
List *relationNameList = NULL; List *relationNameList = NULL;
int relationNameListLength = 0; int relationNameListLength = 0;
Value *relationNameValue = NULL; Value *relationNameValue = NULL;
char **relationName = NULL; char **relationName = NULL;
uint32 dropCount = list_length(dropStmt->objects); uint32 dropCount = list_length(dropStmt->objects);
if (dropCount > 1) if (dropCount > 1)
{ {
ereport(ERROR, ereport(ERROR,
(errmsg("cannot extend name for multiple drop objects"))); (errmsg("cannot extend name for multiple drop objects")));
} }
@ -205,19 +205,30 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId)
switch (relationNameListLength) switch (relationNameListLength)
{ {
case 1: case 1:
{
relationNameValue = linitial(relationNameList); relationNameValue = linitial(relationNameList);
break; break;
}
case 2: case 2:
{
relationNameValue = lsecond(relationNameList); relationNameValue = lsecond(relationNameList);
break; break;
}
case 3: case 3:
{
relationNameValue = lthird(relationNameList); relationNameValue = lthird(relationNameList);
break; break;
}
default: default:
{
ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("improper relation name: \"%s\"", errmsg("improper relation name: \"%s\"",
NameListToString(relationNameList)))); NameListToString(relationNameList))));
break; break;
}
} }
relationName = &(relationNameValue->val.str); relationName = &(relationNameValue->val.str);
@ -304,7 +315,7 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId)
{ {
RenameStmt *renameStmt = (RenameStmt *) parseTree; RenameStmt *renameStmt = (RenameStmt *) parseTree;
ObjectType objectType = renameStmt->renameType; ObjectType objectType = renameStmt->renameType;
if (objectType == OBJECT_TABLE || objectType == OBJECT_SEQUENCE || if (objectType == OBJECT_TABLE || objectType == OBJECT_SEQUENCE ||
objectType == OBJECT_INDEX) objectType == OBJECT_INDEX)
{ {
@ -335,7 +346,7 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId)
* We currently do not support truncate statements. This is * We currently do not support truncate statements. This is
* primarily because truncates allow implicit modifications to * primarily because truncates allow implicit modifications to
* sequences through table column dependencies. As we have not * sequences through table column dependencies. As we have not
* determined our dependency model for sequences, we error here. * determined our dependency model for sequences, we error here.
*/ */
ereport(ERROR, (errmsg("cannot extend name for truncate statement"))); ereport(ERROR, (errmsg("cannot extend name for truncate statement")));
break; break;
@ -384,18 +395,18 @@ TypeAddIndexConstraint(const AlterTableCmd *command)
* associated with an index. * associated with an index.
*/ */
static bool static bool
TypeDropIndexConstraint(const AlterTableCmd *command, TypeDropIndexConstraint(const AlterTableCmd *command,
const RangeVar *relation, uint64 shardId) const RangeVar *relation, uint64 shardId)
{ {
Relation pgConstraint = NULL; Relation pgConstraint = NULL;
SysScanDesc scanDescriptor = NULL; SysScanDesc scanDescriptor = NULL;
ScanKeyData scanKey[1]; ScanKeyData scanKey[1];
int scanKeyCount = 1; int scanKeyCount = 1;
HeapTuple heapTuple = NULL; HeapTuple heapTuple = NULL;
char *searchedConstraintName = NULL; char *searchedConstraintName = NULL;
bool indexConstraint = false; bool indexConstraint = false;
Oid relationId = InvalidOid; Oid relationId = InvalidOid;
bool failOK = true; bool failOK = true;
if (command->subtype != AT_DropConstraint) if (command->subtype != AT_DropConstraint)
@ -423,8 +434,8 @@ TypeDropIndexConstraint(const AlterTableCmd *command,
ScanKeyInit(&scanKey[0], Anum_pg_constraint_conrelid, ScanKeyInit(&scanKey[0], Anum_pg_constraint_conrelid,
BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relationId)); BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relationId));
scanDescriptor = systable_beginscan(pgConstraint, scanDescriptor = systable_beginscan(pgConstraint,
ConstraintRelidIndexId, true, /* indexOK */ ConstraintRelidIndexId, true, /* indexOK */
NULL, scanKeyCount, scanKey); NULL, scanKeyCount, scanKey);
@ -433,7 +444,7 @@ TypeDropIndexConstraint(const AlterTableCmd *command,
{ {
Form_pg_constraint constraintForm = (Form_pg_constraint) GETSTRUCT(heapTuple); Form_pg_constraint constraintForm = (Form_pg_constraint) GETSTRUCT(heapTuple);
char *constraintName = NameStr(constraintForm->conname); char *constraintName = NameStr(constraintForm->conname);
if (strncmp(constraintName, searchedConstraintName, NAMEDATALEN) == 0) if (strncmp(constraintName, searchedConstraintName, NAMEDATALEN) == 0)
{ {
/* we found the constraint, now check if it is for an index */ /* we found the constraint, now check if it is for an index */
@ -442,7 +453,7 @@ TypeDropIndexConstraint(const AlterTableCmd *command,
{ {
indexConstraint = true; indexConstraint = true;
} }
break; break;
} }
@ -451,7 +462,7 @@ TypeDropIndexConstraint(const AlterTableCmd *command,
systable_endscan(scanDescriptor); systable_endscan(scanDescriptor);
heap_close(pgConstraint, AccessShareLock); heap_close(pgConstraint, AccessShareLock);
pfree(searchedConstraintName); pfree(searchedConstraintName);
return indexConstraint; return indexConstraint;
@ -489,10 +500,10 @@ AppendShardIdToConstraintName(AlterTableCmd *command, uint64 shardId)
void void
AppendShardIdToName(char **name, uint64 shardId) AppendShardIdToName(char **name, uint64 shardId)
{ {
char extendedName[NAMEDATALEN]; char extendedName[NAMEDATALEN];
uint32 extendedNameLength = 0; uint32 extendedNameLength = 0;
snprintf(extendedName, NAMEDATALEN, "%s%c" UINT64_FORMAT, snprintf(extendedName, NAMEDATALEN, "%s%c" UINT64_FORMAT,
(*name), SHARD_NAME_SEPARATOR, shardId); (*name), SHARD_NAME_SEPARATOR, shardId);
/* /*

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* shared_library_init.c * shared_library_init.c
* Initialize CitusDB extension * Initialize Citus extension
* *
* Copyright (c) 2012-2015, Citus Data, Inc. * Copyright (c) 2012-2015, Citus Data, Inc.
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
@ -48,23 +48,23 @@ static void NormalizeWorkerListPath(void);
/* GUC enum definitions */ /* GUC enum definitions */
static const struct config_enum_entry task_assignment_policy_options[] = { static const struct config_enum_entry task_assignment_policy_options[] = {
{"greedy", TASK_ASSIGNMENT_GREEDY, false}, { "greedy", TASK_ASSIGNMENT_GREEDY, false },
{"first-replica", TASK_ASSIGNMENT_FIRST_REPLICA, false}, { "first-replica", TASK_ASSIGNMENT_FIRST_REPLICA, false },
{"round-robin", TASK_ASSIGNMENT_ROUND_ROBIN, false}, { "round-robin", TASK_ASSIGNMENT_ROUND_ROBIN, false },
{NULL, 0, false} { NULL, 0, false }
}; };
static const struct config_enum_entry task_executor_type_options[] = { static const struct config_enum_entry task_executor_type_options[] = {
{"real-time", MULTI_EXECUTOR_REAL_TIME, false}, { "real-time", MULTI_EXECUTOR_REAL_TIME, false },
{"task-tracker", MULTI_EXECUTOR_TASK_TRACKER, false}, { "task-tracker", MULTI_EXECUTOR_TASK_TRACKER, false },
{"router", MULTI_EXECUTOR_ROUTER, false}, { "router", MULTI_EXECUTOR_ROUTER, false },
{NULL, 0, false} { NULL, 0, false }
}; };
static const struct config_enum_entry shard_placement_policy_options[] = { static const struct config_enum_entry shard_placement_policy_options[] = {
{"local-node-first", SHARD_PLACEMENT_LOCAL_NODE_FIRST, false}, { "local-node-first", SHARD_PLACEMENT_LOCAL_NODE_FIRST, false },
{"round-robin", SHARD_PLACEMENT_ROUND_ROBIN, false}, { "round-robin", SHARD_PLACEMENT_ROUND_ROBIN, false },
{NULL, 0, false} { NULL, 0, false }
}; };
@ -74,8 +74,8 @@ _PG_init(void)
{ {
if (!process_shared_preload_libraries_in_progress) if (!process_shared_preload_libraries_in_progress)
{ {
ereport(ERROR, (errmsg("CitusDB can only be loaded via shared_preload_libraries"), ereport(ERROR, (errmsg("Citus can only be loaded via shared_preload_libraries"),
errhint("Add citusdb to shared_preload_libraries."))); errhint("Add citus to shared_preload_libraries.")));
} }
/* /*
@ -95,8 +95,8 @@ _PG_init(void)
ExecutorEnd_hook != NULL || ExecutorEnd_hook != NULL ||
ProcessUtility_hook != NULL) ProcessUtility_hook != NULL)
{ {
ereport(ERROR, (errmsg("CitusDB has to be loaded first"), ereport(ERROR, (errmsg("Citus has to be loaded first"),
errhint("Place citusdb at the beginning of " errhint("Place citus at the beginning of "
"shared_preload_libraries."))); "shared_preload_libraries.")));
} }
@ -107,7 +107,7 @@ _PG_init(void)
CreateRequiredDirectories(); CreateRequiredDirectories();
/* /*
* Register CitusDB configuration variables. Do so before intercepting * Register Citus configuration variables. Do so before intercepting
* hooks or calling initialization functions, in case we want to do the * hooks or calling initialization functions, in case we want to do the
* latter in a configuration dependent manner. * latter in a configuration dependent manner.
*/ */
@ -137,7 +137,7 @@ _PG_init(void)
/* /*
* CreateRequiredDirectories - Create directories required for CitusDB to * CreateRequiredDirectories - Create directories required for Citus to
* function. * function.
* *
* These used to be created by initdb, but that's not possible anymore. * These used to be created by initdb, but that's not possible anymore.
@ -166,12 +166,12 @@ CreateRequiredDirectories(void)
} }
/* Register CitusDB configuration variables. */ /* Register Citus configuration variables. */
static void static void
RegisterCitusConfigVariables(void) RegisterCitusConfigVariables(void)
{ {
DefineCustomStringVariable( DefineCustomStringVariable(
"citusdb.worker_list_file", "citus.worker_list_file",
gettext_noop("Sets the server's \"worker_list\" configuration file."), gettext_noop("Sets the server's \"worker_list\" configuration file."),
NULL, NULL,
&WorkerListFileName, &WorkerListFileName,
@ -182,7 +182,7 @@ RegisterCitusConfigVariables(void)
NormalizeWorkerListPath(); NormalizeWorkerListPath();
DefineCustomBoolVariable( DefineCustomBoolVariable(
"citusdb.binary_master_copy_format", "citus.binary_master_copy_format",
gettext_noop("Use the binary master copy format."), gettext_noop("Use the binary master copy format."),
gettext_noop("When enabled, data is copied from workers to the master " gettext_noop("When enabled, data is copied from workers to the master "
"in PostgreSQL's binary serialization format."), "in PostgreSQL's binary serialization format."),
@ -193,7 +193,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomBoolVariable( DefineCustomBoolVariable(
"citusdb.binary_worker_copy_format", "citus.binary_worker_copy_format",
gettext_noop("Use the binary worker copy format."), gettext_noop("Use the binary worker copy format."),
gettext_noop("When enabled, data is copied from workers to workers " gettext_noop("When enabled, data is copied from workers to workers "
"in PostgreSQL's binary serialization format when " "in PostgreSQL's binary serialization format when "
@ -205,10 +205,11 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomBoolVariable( DefineCustomBoolVariable(
"citusdb.expire_cached_shards", "citus.expire_cached_shards",
gettext_noop("Enables shard cache expiration if a shard's size on disk has changed. "), gettext_noop("Enables shard cache expiration if a shard's size on disk has "
gettext_noop("When appending to an existing shard, old data may still be cached on " "changed."),
"other workers. This configuration entry activates automatic " gettext_noop("When appending to an existing shard, old data may still be cached "
"on other workers. This configuration entry activates automatic "
"expiration, but should not be used with manual updates to shards."), "expiration, but should not be used with manual updates to shards."),
&ExpireCachedShards, &ExpireCachedShards,
false, false,
@ -217,7 +218,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomBoolVariable( DefineCustomBoolVariable(
"citusdb.subquery_pushdown", "citus.subquery_pushdown",
gettext_noop("Enables supported subquery pushdown to workers."), gettext_noop("Enables supported subquery pushdown to workers."),
NULL, NULL,
&SubqueryPushdown, &SubqueryPushdown,
@ -227,7 +228,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomBoolVariable( DefineCustomBoolVariable(
"citusdb.log_multi_join_order", "citus.log_multi_join_order",
gettext_noop("Logs the distributed join order to the server log."), gettext_noop("Logs the distributed join order to the server log."),
gettext_noop("We use this private configuration entry as a debugging aid. " gettext_noop("We use this private configuration entry as a debugging aid. "
"If enabled, we print the distributed join order."), "If enabled, we print the distributed join order."),
@ -238,7 +239,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomBoolVariable( DefineCustomBoolVariable(
"citusdb.explain_multi_logical_plan", "citus.explain_multi_logical_plan",
gettext_noop("Enables Explain to print out distributed logical plans."), gettext_noop("Enables Explain to print out distributed logical plans."),
gettext_noop("We use this private configuration entry as a debugging aid. " gettext_noop("We use this private configuration entry as a debugging aid. "
"If enabled, the Explain command prints out the optimized " "If enabled, the Explain command prints out the optimized "
@ -250,7 +251,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomBoolVariable( DefineCustomBoolVariable(
"citusdb.explain_multi_physical_plan", "citus.explain_multi_physical_plan",
gettext_noop("Enables Explain to print out distributed physical plans."), gettext_noop("Enables Explain to print out distributed physical plans."),
gettext_noop("We use this private configuration entry as a debugging aid. " gettext_noop("We use this private configuration entry as a debugging aid. "
"If enabled, the Explain command prints out the physical " "If enabled, the Explain command prints out the physical "
@ -262,7 +263,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomBoolVariable( DefineCustomBoolVariable(
"citusdb.all_modifications_commutative", "citus.all_modifications_commutative",
gettext_noop("Bypasses commutativity checks when enabled"), gettext_noop("Bypasses commutativity checks when enabled"),
NULL, NULL,
&AllModificationsCommutative, &AllModificationsCommutative,
@ -272,7 +273,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomIntVariable( DefineCustomIntVariable(
"citusdb.shard_replication_factor", "citus.shard_replication_factor",
gettext_noop("Sets the replication factor for shards."), gettext_noop("Sets the replication factor for shards."),
gettext_noop("Shards are replicated across nodes according to this " gettext_noop("Shards are replicated across nodes according to this "
"replication factor. Note that shards read this " "replication factor. Note that shards read this "
@ -285,7 +286,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomIntVariable( DefineCustomIntVariable(
"citusdb.shard_max_size", "citus.shard_max_size",
gettext_noop("Sets the maximum size a shard will grow before it gets split."), gettext_noop("Sets the maximum size a shard will grow before it gets split."),
gettext_noop("Shards store table and file data. When the source " gettext_noop("Shards store table and file data. When the source "
"file's size for one shard exceeds this configuration " "file's size for one shard exceeds this configuration "
@ -300,7 +301,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomIntVariable( DefineCustomIntVariable(
"citusdb.max_worker_nodes_tracked", "citus.max_worker_nodes_tracked",
gettext_noop("Sets the maximum number of worker nodes that are tracked."), gettext_noop("Sets the maximum number of worker nodes that are tracked."),
gettext_noop("Worker nodes' network locations, their membership and " gettext_noop("Worker nodes' network locations, their membership and "
"health status are tracked in a shared hash table on " "health status are tracked in a shared hash table on "
@ -314,7 +315,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomIntVariable( DefineCustomIntVariable(
"citusdb.remote_task_check_interval", "citus.remote_task_check_interval",
gettext_noop("Sets the frequency at which we check job statuses."), gettext_noop("Sets the frequency at which we check job statuses."),
gettext_noop("The master node assigns tasks to workers nodes, and " gettext_noop("The master node assigns tasks to workers nodes, and "
"then regularly checks with them about each task's " "then regularly checks with them about each task's "
@ -327,7 +328,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomIntVariable( DefineCustomIntVariable(
"citusdb.task_tracker_delay", "citus.task_tracker_delay",
gettext_noop("Task tracker sleep time between task management rounds."), gettext_noop("Task tracker sleep time between task management rounds."),
gettext_noop("The task tracker process wakes up regularly, walks over " gettext_noop("The task tracker process wakes up regularly, walks over "
"all tasks assigned to it, and schedules and executes these " "all tasks assigned to it, and schedules and executes these "
@ -341,7 +342,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomIntVariable( DefineCustomIntVariable(
"citusdb.max_assign_task_batch_size", "citus.max_assign_task_batch_size",
gettext_noop("Sets the maximum number of tasks to assign per round."), gettext_noop("Sets the maximum number of tasks to assign per round."),
gettext_noop("The master node synchronously assigns tasks to workers in " gettext_noop("The master node synchronously assigns tasks to workers in "
"batches. Bigger batches allow for faster task assignment, " "batches. Bigger batches allow for faster task assignment, "
@ -355,7 +356,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomIntVariable( DefineCustomIntVariable(
"citusdb.max_tracked_tasks_per_node", "citus.max_tracked_tasks_per_node",
gettext_noop("Sets the maximum number of tracked tasks per node."), gettext_noop("Sets the maximum number of tracked tasks per node."),
gettext_noop("The task tracker processes keeps all assigned tasks in " gettext_noop("The task tracker processes keeps all assigned tasks in "
"a shared hash table, and schedules and executes these " "a shared hash table, and schedules and executes these "
@ -369,7 +370,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomIntVariable( DefineCustomIntVariable(
"citusdb.max_running_tasks_per_node", "citus.max_running_tasks_per_node",
gettext_noop("Sets the maximum number of tasks to run concurrently per node."), gettext_noop("Sets the maximum number of tasks to run concurrently per node."),
gettext_noop("The task tracker process schedules and executes the tasks " gettext_noop("The task tracker process schedules and executes the tasks "
"assigned to it as appropriate. This configuration value " "assigned to it as appropriate. This configuration value "
@ -382,7 +383,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomIntVariable( DefineCustomIntVariable(
"citusdb.partition_buffer_size", "citus.partition_buffer_size",
gettext_noop("Sets the buffer size to use for partition operations."), gettext_noop("Sets the buffer size to use for partition operations."),
gettext_noop("Worker nodes allow for table data to be repartitioned " gettext_noop("Worker nodes allow for table data to be repartitioned "
"into multiple text files, much like Hadoop's Map " "into multiple text files, much like Hadoop's Map "
@ -396,7 +397,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomIntVariable( DefineCustomIntVariable(
"citusdb.large_table_shard_count", "citus.large_table_shard_count",
gettext_noop("The shard count threshold over which a table is considered large."), gettext_noop("The shard count threshold over which a table is considered large."),
gettext_noop("A distributed table is considered to be large if it has " gettext_noop("A distributed table is considered to be large if it has "
"more shards than the value specified here. This largeness " "more shards than the value specified here. This largeness "
@ -409,7 +410,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomIntVariable( DefineCustomIntVariable(
"citusdb.limit_clause_row_fetch_count", "citus.limit_clause_row_fetch_count",
gettext_noop("Number of rows to fetch per task for limit clause optimization."), gettext_noop("Number of rows to fetch per task for limit clause optimization."),
gettext_noop("Select queries get partitioned and executed as smaller " gettext_noop("Select queries get partitioned and executed as smaller "
"tasks. In some cases, select queries with limit clauses " "tasks. In some cases, select queries with limit clauses "
@ -424,7 +425,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomRealVariable( DefineCustomRealVariable(
"citusdb.count_distinct_error_rate", "citus.count_distinct_error_rate",
gettext_noop("Desired error rate when calculating count(distinct) " gettext_noop("Desired error rate when calculating count(distinct) "
"approximates using the postgresql-hll extension. " "approximates using the postgresql-hll extension. "
"0.0 disables approximations for count(distinct); 1.0 " "0.0 disables approximations for count(distinct); 1.0 "
@ -437,14 +438,14 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomEnumVariable( DefineCustomEnumVariable(
"citusdb.task_assignment_policy", "citus.task_assignment_policy",
gettext_noop("Sets the policy to use when assigning tasks to worker nodes."), gettext_noop("Sets the policy to use when assigning tasks to worker nodes."),
gettext_noop("The master node assigns tasks to worker nodes based on shard " gettext_noop("The master node assigns tasks to worker nodes based on shard "
"locations. This configuration value specifies the policy to " "locations. This configuration value specifies the policy to "
"use when making these assignments. The greedy policy aims to " "use when making these assignments. The greedy policy aims to "
"evenly distribute tasks across worker nodes, first-replica just " "evenly distribute tasks across worker nodes, first-replica just "
"assigns tasks in the order shard placements were created, " "assigns tasks in the order shard placements were created, "
"and the round-robin policy assigns tasks to worker nodes in " "and the round-robin policy assigns tasks to worker nodes in "
"a round-robin fashion."), "a round-robin fashion."),
&TaskAssignmentPolicy, &TaskAssignmentPolicy,
TASK_ASSIGNMENT_GREEDY, TASK_ASSIGNMENT_GREEDY,
@ -454,7 +455,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomEnumVariable( DefineCustomEnumVariable(
"citusdb.task_executor_type", "citus.task_executor_type",
gettext_noop("Sets the executor type to be used for distributed queries."), gettext_noop("Sets the executor type to be used for distributed queries."),
gettext_noop("The master node chooses between three different executor types " gettext_noop("The master node chooses between three different executor types "
"when executing a distributed query. The router executor is " "when executing a distributed query. The router executor is "
@ -472,7 +473,7 @@ RegisterCitusConfigVariables(void)
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomEnumVariable( DefineCustomEnumVariable(
"citusdb.shard_placement_policy", "citus.shard_placement_policy",
gettext_noop("Sets the policy to use when choosing nodes for shard placement."), gettext_noop("Sets the policy to use when choosing nodes for shard placement."),
gettext_noop("The master node chooses which worker nodes to place new shards " gettext_noop("The master node chooses which worker nodes to place new shards "
"on. This configuration value specifies the policy to use when " "on. This configuration value specifies the policy to use when "
@ -486,16 +487,14 @@ RegisterCitusConfigVariables(void)
0, 0,
NULL, NULL, NULL); NULL, NULL, NULL);
/* warn about config items in the citusdb namespace that are not registered above */ /* warn about config items in the citus namespace that are not registered above */
EmitWarningsOnPlaceholders("citusdb");
/* Also warn about citus namespace, as that's a very likely misspelling */
EmitWarningsOnPlaceholders("citus"); EmitWarningsOnPlaceholders("citus");
} }
/* /*
* NormalizeWorkerListPath converts the path configured via * NormalizeWorkerListPath converts the path configured via
* citusdb.worker_list_file into an absolute path, falling back to the default * citus.worker_list_file into an absolute path, falling back to the default
* value if necessary. The previous value of the config variable is * value if necessary. The previous value of the config variable is
* overwritten with the normalized value. * overwritten with the normalized value.
* *
@ -515,8 +514,10 @@ NormalizeWorkerListPath(void)
{ {
absoluteFileName = malloc(strlen(DataDir) + strlen(WORKER_LIST_FILENAME) + 2); absoluteFileName = malloc(strlen(DataDir) + strlen(WORKER_LIST_FILENAME) + 2);
if (absoluteFileName == NULL) if (absoluteFileName == NULL)
{
ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory"))); errmsg("out of memory")));
}
sprintf(absoluteFileName, "%s/%s", DataDir, WORKER_LIST_FILENAME); sprintf(absoluteFileName, "%s/%s", DataDir, WORKER_LIST_FILENAME);
} }
@ -525,11 +526,12 @@ NormalizeWorkerListPath(void)
ereport(FATAL, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), ereport(FATAL, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("%s does not know where to find the \"worker_list_file\" " errmsg("%s does not know where to find the \"worker_list_file\" "
"configuration file.\n" "configuration file.\n"
"This can be specified as \"citusdb.worker_list_file\" in " "This can be specified as \"citus.worker_list_file\" in "
"\"%s\", or by the -D invocation option, or by the PGDATA " "\"%s\", or by the -D invocation option, or by the PGDATA "
"environment variable.\n", progname, ConfigFileName))); "environment variable.\n", progname, ConfigFileName)));
} }
SetConfigOption("citusdb.worker_list_file", absoluteFileName, PGC_POSTMASTER, PGC_S_OVERRIDE); SetConfigOption("citus.worker_list_file", absoluteFileName, PGC_POSTMASTER,
PGC_S_OVERRIDE);
free(absoluteFileName); free(absoluteFileName);
} }

View File

@ -2,7 +2,7 @@
* *
* test/src/connection_cache.c * test/src/connection_cache.c
* *
* This file contains functions to exercise CitusDB's connection hash * This file contains functions to exercise Citus's connection hash
* functionality for purposes of unit testing. * functionality for purposes of unit testing.
* *
* Copyright (c) 2014-2015, Citus Data, Inc. * Copyright (c) 2014-2015, Citus Data, Inc.
@ -48,7 +48,7 @@ initialize_remote_temp_table(PG_FUNCTION_ARGS)
int32 nodePort = PG_GETARG_INT32(1); int32 nodePort = PG_GETARG_INT32(1);
PGresult *result = NULL; PGresult *result = NULL;
PGconn *connection = GetConnection(nodeName, nodePort); PGconn *connection = GetOrEstablishConnection(nodeName, nodePort);
if (connection == NULL) if (connection == NULL)
{ {
PG_RETURN_BOOL(false); PG_RETURN_BOOL(false);
@ -79,7 +79,7 @@ count_remote_temp_table_rows(PG_FUNCTION_ARGS)
Datum count = Int32GetDatum(-1); Datum count = Int32GetDatum(-1);
PGresult *result = NULL; PGresult *result = NULL;
PGconn *connection = GetConnection(nodeName, nodePort); PGconn *connection = GetOrEstablishConnection(nodeName, nodePort);
if (connection == NULL) if (connection == NULL)
{ {
PG_RETURN_DATUM(count); PG_RETURN_DATUM(count);
@ -114,7 +114,7 @@ get_and_purge_connection(PG_FUNCTION_ARGS)
char *nodeName = PG_GETARG_CSTRING(0); char *nodeName = PG_GETARG_CSTRING(0);
int32 nodePort = PG_GETARG_INT32(1); int32 nodePort = PG_GETARG_INT32(1);
PGconn *connection = GetConnection(nodeName, nodePort); PGconn *connection = GetOrEstablishConnection(nodeName, nodePort);
if (connection == NULL) if (connection == NULL)
{ {
PG_RETURN_BOOL(false); PG_RETURN_BOOL(false);
@ -136,7 +136,7 @@ set_connection_status_bad(PG_FUNCTION_ARGS)
char *nodeName = PG_GETARG_CSTRING(0); char *nodeName = PG_GETARG_CSTRING(0);
int32 nodePort = PG_GETARG_INT32(1); int32 nodePort = PG_GETARG_INT32(1);
PGconn *connection = GetConnection(nodeName, nodePort); PGconn *connection = GetOrEstablishConnection(nodeName, nodePort);
if (connection == NULL) if (connection == NULL)
{ {
PG_RETURN_BOOL(false); PG_RETURN_BOOL(false);

View File

@ -3,7 +3,7 @@
* test/src/create_shards.c * test/src/create_shards.c
* *
* This file contains functions to exercise shard creation functionality * This file contains functions to exercise shard creation functionality
* within CitusDB. * within Citus.
* *
* Copyright (c) 2014-2015, Citus Data, Inc. * Copyright (c) 2014-2015, Citus Data, Inc.
* *

View File

@ -3,7 +3,7 @@
* test/src/distribution_metadata.c * test/src/distribution_metadata.c
* *
* This file contains functions to exercise distributed table metadata * This file contains functions to exercise distributed table metadata
* functionality within CitusDB. * functionality within Citus.
* *
* Copyright (c) 2014-2015, Citus Data, Inc. * Copyright (c) 2014-2015, Citus Data, Inc.
* *

View File

@ -116,9 +116,9 @@ FakeGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid,
ForeignPath *best_path, List *tlist, List *scan_clauses) ForeignPath *best_path, List *tlist, List *scan_clauses)
#else #else
static ForeignScan * static ForeignScan *
FakeGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid, FakeGetForeignPlan(PlannerInfo * root, RelOptInfo * baserel, Oid foreigntableid,
ForeignPath *best_path, List *tlist, List *scan_clauses, ForeignPath * best_path, List * tlist, List * scan_clauses,
Plan *outer_plan) Plan * outer_plan)
#endif #endif
{ {
Index scan_relid = baserel->relid; Index scan_relid = baserel->relid;
@ -129,7 +129,7 @@ FakeGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid,
return make_foreignscan(tlist, scan_clauses, scan_relid, NIL, NIL); return make_foreignscan(tlist, scan_clauses, scan_relid, NIL, NIL);
#else #else
return make_foreignscan(tlist, scan_clauses, scan_relid, NIL, NIL, NIL, NIL, return make_foreignscan(tlist, scan_clauses, scan_relid, NIL, NIL, NIL, NIL,
outer_plan); outer_plan);
#endif #endif
} }

View File

@ -3,7 +3,7 @@
* test/src/generate_ddl_commands.c * test/src/generate_ddl_commands.c
* *
* This file contains functions to exercise DDL generation functionality * This file contains functions to exercise DDL generation functionality
* within CitusDB. * within Citus.
* *
* Copyright (c) 2014-2015, Citus Data, Inc. * Copyright (c) 2014-2015, Citus Data, Inc.
* *

View File

@ -3,7 +3,7 @@
* test/src/create_shards.c * test/src/create_shards.c
* *
* This file contains functions to exercise shard creation functionality * This file contains functions to exercise shard creation functionality
* within CitusDB. * within Citus.
* *
* Copyright (c) 2014-2015, Citus Data, Inc. * Copyright (c) 2014-2015, Citus Data, Inc.
* *

View File

@ -2,7 +2,7 @@
* *
* test/src/test_helper_functions.c * test/src/test_helper_functions.c
* *
* This file contains helper functions used in many CitusDB tests. * This file contains helper functions used in many Citus tests.
* *
* Copyright (c) 2014-2015, Citus Data, Inc. * Copyright (c) 2014-2015, Citus Data, Inc.
* *

View File

@ -16,7 +16,7 @@
/* exports for SQL callable functions */ /* exports for SQL callable functions */
PG_FUNCTION_INFO_V1(citusdb_extradata_container); PG_FUNCTION_INFO_V1(citus_extradata_container);
/* /*
@ -189,7 +189,7 @@ ExtractRangeTblExtraData(RangeTblEntry *rte, CitusRTEKind *rteKind,
if (list_length(fauxFuncExpr->args) != 4) if (list_length(fauxFuncExpr->args) != 4)
{ {
ereport(ERROR, (errmsg("unexpected number of function arguments to " ereport(ERROR, (errmsg("unexpected number of function arguments to "
"citusdb_extradata_container"))); "citus_extradata_container")));
return; return;
} }
@ -265,7 +265,7 @@ GetRangeTblKind(RangeTblEntry *rte)
{ {
CitusRTEKind rteKind = CITUS_RTE_RELATION /* invalid */; CitusRTEKind rteKind = CITUS_RTE_RELATION /* invalid */;
switch(rte->rtekind) switch (rte->rtekind)
{ {
/* directly rtekind if it's not possibly an extended RTE */ /* directly rtekind if it's not possibly an extended RTE */
case RTE_RELATION: case RTE_RELATION:
@ -273,9 +273,13 @@ GetRangeTblKind(RangeTblEntry *rte)
case RTE_JOIN: case RTE_JOIN:
case RTE_VALUES: case RTE_VALUES:
case RTE_CTE: case RTE_CTE:
{
rteKind = (CitusRTEKind) rte->rtekind; rteKind = (CitusRTEKind) rte->rtekind;
break; break;
}
case RTE_FUNCTION: case RTE_FUNCTION:
{
/* /*
* Extract extra data - correct even if a plain RTE_FUNCTION, not * Extract extra data - correct even if a plain RTE_FUNCTION, not
* an extended one, ExtractRangeTblExtraData handles that case * an extended one, ExtractRangeTblExtraData handles that case
@ -283,6 +287,7 @@ GetRangeTblKind(RangeTblEntry *rte)
*/ */
ExtractRangeTblExtraData(rte, &rteKind, NULL, NULL, NULL); ExtractRangeTblExtraData(rte, &rteKind, NULL, NULL, NULL);
break; break;
}
} }
return rteKind; return rteKind;
@ -290,13 +295,13 @@ GetRangeTblKind(RangeTblEntry *rte)
/* /*
* citusdb_extradata_container is a placeholder function to store information * citus_extradata_container is a placeholder function to store information
* needed by CitusDB in plain postgres node trees. Executor and other hooks * needed by Citus in plain postgres node trees. Executor and other hooks
* should always intercept statements containing calls to this function. It's * should always intercept statements containing calls to this function. It's
* not actually SQL callable by the user because of an INTERNAL argument. * not actually SQL callable by the user because of an INTERNAL argument.
*/ */
Datum Datum
citusdb_extradata_container(PG_FUNCTION_ARGS) citus_extradata_container(PG_FUNCTION_ARGS)
{ {
ereport(ERROR, (errmsg("not supposed to get here, did you cheat?"))); ereport(ERROR, (errmsg("not supposed to get here, did you cheat?")));

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* citus_outfuncs.c * citus_outfuncs.c
* Output functions for CitusDB tree nodes. * Output functions for Citus tree nodes.
* *
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
@ -9,7 +9,7 @@
* *
* NOTES * NOTES
* This is a wrapper around postgres' nodeToString() that additionally * This is a wrapper around postgres' nodeToString() that additionally
* supports CitusDB node types. * supports Citus node types.
* *
* Keep as closely aligned with the upstream version as possible. * Keep as closely aligned with the upstream version as possible.
* *
@ -220,7 +220,7 @@ _outDatum(StringInfo str, Datum value, int typlen, bool typbyval)
/***************************************************************************** /*****************************************************************************
* Output routines for CitusDB node types * Output routines for Citus node types
*****************************************************************************/ *****************************************************************************/
static void static void

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* citus_readfuncs.c * citus_readfuncs.c
* CitusDB adapted reader functions for Citus & Postgres tree nodes * Citus adapted reader functions for Citus & Postgres tree nodes
* *
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* citus_readfuncs.c * citus_readfuncs.c
* CitusDB adapted reader functions for Citus & Postgres tree nodes * Citus adapted reader functions for Citus & Postgres tree nodes
* *
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California

View File

@ -102,6 +102,7 @@ pg_get_extensiondef_string(Oid tableRelationId)
static Oid static Oid
get_extension_schema(Oid ext_oid) get_extension_schema(Oid ext_oid)
{ {
/* *INDENT-OFF* */
Oid result; Oid result;
Relation rel; Relation rel;
SysScanDesc scandesc; SysScanDesc scandesc;
@ -131,6 +132,7 @@ get_extension_schema(Oid ext_oid)
heap_close(rel, AccessShareLock); heap_close(rel, AccessShareLock);
return result; return result;
/* *INDENT-ON* */
} }
@ -186,7 +188,7 @@ AppendOptionListToString(StringInfo stringBuffer, List *optionList)
foreach(optionCell, optionList) foreach(optionCell, optionList)
{ {
DefElem *option = (DefElem*) lfirst(optionCell); DefElem *option = (DefElem *) lfirst(optionCell);
char *optionName = option->defname; char *optionName = option->defname;
char *optionValue = defGetString(option); char *optionValue = defGetString(option);
@ -219,7 +221,7 @@ pg_get_tableschemadef_string(Oid tableRelationId)
char relationKind = 0; char relationKind = 0;
TupleDesc tupleDescriptor = NULL; TupleDesc tupleDescriptor = NULL;
TupleConstr *tupleConstraints = NULL; TupleConstr *tupleConstraints = NULL;
int attributeIndex = 0; int attributeIndex = 0;
bool firstAttributePrinted = false; bool firstAttributePrinted = false;
AttrNumber defaultValueIndex = 0; AttrNumber defaultValueIndex = 0;
AttrNumber constraintIndex = 0; AttrNumber constraintIndex = 0;
@ -447,21 +449,35 @@ pg_get_tablecolumnoptionsdef_string(Oid tableRelationId)
switch (attributeForm->attstorage) switch (attributeForm->attstorage)
{ {
case 'p': case 'p':
{
storageName = "PLAIN"; storageName = "PLAIN";
break; break;
}
case 'e': case 'e':
{
storageName = "EXTERNAL"; storageName = "EXTERNAL";
break; break;
}
case 'm': case 'm':
{
storageName = "MAIN"; storageName = "MAIN";
break; break;
}
case 'x': case 'x':
{
storageName = "EXTENDED"; storageName = "EXTENDED";
break; break;
}
default: default:
{
ereport(ERROR, (errmsg("unrecognized storage type: %c", ereport(ERROR, (errmsg("unrecognized storage type: %c",
attributeForm->attstorage))); attributeForm->attstorage)));
break; break;
}
} }
appendStringInfo(&statement, "ALTER COLUMN %s ", appendStringInfo(&statement, "ALTER COLUMN %s ",

View File

@ -32,7 +32,7 @@
/* /*
* NodeConnectionHash is the connection hash itself. It begins uninitialized. * NodeConnectionHash is the connection hash itself. It begins uninitialized.
* The first call to GetConnection triggers hash creation. * The first call to GetOrEstablishConnection triggers hash creation.
*/ */
static HTAB *NodeConnectionHash = NULL; static HTAB *NodeConnectionHash = NULL;
@ -44,10 +44,10 @@ static char * ConnectionGetOptionValue(PGconn *connection, char *optionKeyword);
/* /*
* GetConnection returns a PGconn which can be used to execute queries on a * GetOrEstablishConnection returns a PGconn which can be used to execute
* remote PostgreSQL server. If no suitable connection to the specified node on * queries on a remote PostgreSQL server. If no suitable connection to the
* the specified port yet exists, the function establishes a new connection and * specified node on the specified port yet exists, the function establishes
* returns that. * a new connection and adds it to the connection cache before returning it.
* *
* Returned connections are guaranteed to be in the CONNECTION_OK state. If the * Returned connections are guaranteed to be in the CONNECTION_OK state. If the
* requested connection cannot be established, or if it was previously created * requested connection cannot be established, or if it was previously created
@ -56,7 +56,7 @@ static char * ConnectionGetOptionValue(PGconn *connection, char *optionKeyword);
* This function throws an error if a hostname over 255 characters is provided. * This function throws an error if a hostname over 255 characters is provided.
*/ */
PGconn * PGconn *
GetConnection(char *nodeName, int32 nodePort) GetOrEstablishConnection(char *nodeName, int32 nodePort)
{ {
PGconn *connection = NULL; PGconn *connection = NULL;
NodeConnectionKey nodeConnectionKey; NodeConnectionKey nodeConnectionKey;
@ -249,7 +249,7 @@ CreateNodeConnectionHash(void)
info.hcxt = CacheMemoryContext; info.hcxt = CacheMemoryContext;
hashFlags = (HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); hashFlags = (HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
nodeConnectionHash = hash_create("citusdb connection cache", 32, &info, hashFlags); nodeConnectionHash = hash_create("citus connection cache", 32, &info, hashFlags);
return nodeConnectionHash; return nodeConnectionHash;
} }
@ -257,7 +257,7 @@ CreateNodeConnectionHash(void)
/* /*
* ConnectToNode opens a connection to a remote PostgreSQL server. The function * ConnectToNode opens a connection to a remote PostgreSQL server. The function
* configures the connection's fallback application name to 'citusdb' and sets * configures the connection's fallback application name to 'citus' and sets
* the remote encoding to match the local one. This function requires that the * the remote encoding to match the local one. This function requires that the
* port be specified as a string for easier use with libpq functions. * port be specified as a string for easier use with libpq functions.
* *
@ -277,7 +277,7 @@ ConnectToNode(char *nodeName, char *nodePort)
"client_encoding", "connect_timeout", "dbname", NULL "client_encoding", "connect_timeout", "dbname", NULL
}; };
const char *valueArray[] = { const char *valueArray[] = {
nodeName, nodePort, "citusdb", clientEncoding, nodeName, nodePort, "citus", clientEncoding,
CLIENT_CONNECT_TIMEOUT_SECONDS, dbname, NULL CLIENT_CONNECT_TIMEOUT_SECONDS, dbname, NULL
}; };

View File

@ -51,10 +51,10 @@ static void InvalidateDistRelationCacheCallback(Datum argument, Oid relationId);
static HeapTuple LookupDistPartitionTuple(Oid relationId); static HeapTuple LookupDistPartitionTuple(Oid relationId);
static List * LookupDistShardTuples(Oid relationId); static List * LookupDistShardTuples(Oid relationId);
static void GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod, static void GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod,
Oid *intervalTypeId, int32 *intervalTypeMod); Oid *intervalTypeId, int32 *intervalTypeMod);
static ShardInterval * TupleToShardInterval(HeapTuple heapTuple, static ShardInterval * TupleToShardInterval(HeapTuple heapTuple,
TupleDesc tupleDescriptor, Oid intervalTypeId, TupleDesc tupleDescriptor, Oid intervalTypeId,
int32 intervalTypeMod); int32 intervalTypeMod);
static void CachedRelationLookup(const char *relationName, Oid *cachedOid); static void CachedRelationLookup(const char *relationName, Oid *cachedOid);
@ -77,7 +77,7 @@ IsDistributedTable(Oid relationId)
* yet. As we can't do lookups in nonexistent tables, directly return * yet. As we can't do lookups in nonexistent tables, directly return
* false. * false.
*/ */
if (!CitusDBHasBeenLoaded()) if (!CitusHasBeenLoaded())
{ {
return false; return false;
} }
@ -87,6 +87,7 @@ IsDistributedTable(Oid relationId)
return cacheEntry->isDistributedTable; return cacheEntry->isDistributedTable;
} }
/* /*
* LoadShardInterval reads shard metadata for given shardId from pg_dist_shard, * LoadShardInterval reads shard metadata for given shardId from pg_dist_shard,
* and converts min/max values in these metadata to their properly typed datum * and converts min/max values in these metadata to their properly typed datum
@ -98,7 +99,7 @@ LoadShardInterval(uint64 shardId)
{ {
ShardInterval *shardInterval; ShardInterval *shardInterval;
SysScanDesc scanDescriptor = NULL; SysScanDesc scanDescriptor = NULL;
ScanKeyData scanKey[1]; ScanKeyData scanKey[1];
int scanKeyCount = 1; int scanKeyCount = 1;
HeapTuple heapTuple = NULL; HeapTuple heapTuple = NULL;
Form_pg_dist_shard shardForm = NULL; Form_pg_dist_shard shardForm = NULL;
@ -127,11 +128,11 @@ LoadShardInterval(uint64 shardId)
partitionEntry = DistributedTableCacheEntry(shardForm->logicalrelid); partitionEntry = DistributedTableCacheEntry(shardForm->logicalrelid);
GetPartitionTypeInputInfo(partitionEntry->partitionKeyString, GetPartitionTypeInputInfo(partitionEntry->partitionKeyString,
partitionEntry->partitionMethod, &intervalTypeId, partitionEntry->partitionMethod, &intervalTypeId,
&intervalTypeMod); &intervalTypeMod);
shardInterval = TupleToShardInterval(heapTuple, tupleDescriptor, intervalTypeId, shardInterval = TupleToShardInterval(heapTuple, tupleDescriptor, intervalTypeId,
intervalTypeMod); intervalTypeMod);
systable_endscan(scanDescriptor); systable_endscan(scanDescriptor);
heap_close(pgDistShard, AccessShareLock); heap_close(pgDistShard, AccessShareLock);
@ -139,6 +140,7 @@ LoadShardInterval(uint64 shardId)
return shardInterval; return shardInterval;
} }
/* /*
* DistributedTableCacheEntry looks up a pg_dist_partition entry for a * DistributedTableCacheEntry looks up a pg_dist_partition entry for a
* relation. * relation.
@ -155,7 +157,7 @@ DistributedTableCacheEntry(Oid distributedRelationId)
* yet. As we can't do lookups in nonexistent tables, directly return NULL * yet. As we can't do lookups in nonexistent tables, directly return NULL
* here. * here.
*/ */
if (!CitusDBHasBeenLoaded()) if (!CitusHasBeenLoaded())
{ {
return NULL; return NULL;
} }
@ -239,19 +241,19 @@ LookupDistTableCacheEntry(Oid relationId)
int32 intervalTypeMod = -1; int32 intervalTypeMod = -1;
GetPartitionTypeInputInfo(partitionKeyString, partitionMethod, &intervalTypeId, GetPartitionTypeInputInfo(partitionKeyString, partitionMethod, &intervalTypeId,
&intervalTypeMod); &intervalTypeMod);
shardIntervalArray = MemoryContextAllocZero(CacheMemoryContext, shardIntervalArray = MemoryContextAllocZero(CacheMemoryContext,
shardIntervalArrayLength * shardIntervalArrayLength *
sizeof(ShardInterval)); sizeof(ShardInterval));
foreach(distShardTupleCell, distShardTupleList) foreach(distShardTupleCell, distShardTupleList)
{ {
HeapTuple shardTuple = lfirst(distShardTupleCell); HeapTuple shardTuple = lfirst(distShardTupleCell);
ShardInterval *shardInterval = TupleToShardInterval(shardTuple, ShardInterval *shardInterval = TupleToShardInterval(shardTuple,
distShardTupleDesc, distShardTupleDesc,
intervalTypeId, intervalTypeId,
intervalTypeMod); intervalTypeMod);
MemoryContext oldContext = MemoryContextSwitchTo(CacheMemoryContext); MemoryContext oldContext = MemoryContextSwitchTo(CacheMemoryContext);
CopyShardInterval(shardInterval, &shardIntervalArray[arrayIndex]); CopyShardInterval(shardInterval, &shardIntervalArray[arrayIndex]);
@ -292,7 +294,7 @@ LookupDistTableCacheEntry(Oid relationId)
/* /*
* CitusDBHasBeenLoaded returns true if the citusdb extension has been created * CitusHasBeenLoaded returns true if the citus extension has been created
* in the current database and the extension script has been executed. Otherwise, * in the current database and the extension script has been executed. Otherwise,
* it returns false. The result is cached as this is called very frequently. * it returns false. The result is cached as this is called very frequently.
* *
@ -301,17 +303,17 @@ LookupDistTableCacheEntry(Oid relationId)
* acceptable. * acceptable.
*/ */
bool bool
CitusDBHasBeenLoaded(void) CitusHasBeenLoaded(void)
{ {
static bool extensionLoaded = false; static bool extensionLoaded = false;
/* recheck presence until citusdb has been loaded */ /* recheck presence until citus has been loaded */
if (!extensionLoaded) if (!extensionLoaded)
{ {
bool extensionPresent = false; bool extensionPresent = false;
bool extensionScriptExecuted = true; bool extensionScriptExecuted = true;
Oid extensionOid = get_extension_oid("citusdb", true); Oid extensionOid = get_extension_oid("citus", true);
if (extensionOid != InvalidOid) if (extensionOid != InvalidOid)
{ {
extensionPresent = true; extensionPresent = true;
@ -319,7 +321,7 @@ CitusDBHasBeenLoaded(void)
if (extensionPresent) if (extensionPresent)
{ {
/* check if CitusDB extension objects are still being created */ /* check if Citus extension objects are still being created */
if (creating_extension && CurrentExtensionObject == extensionOid) if (creating_extension && CurrentExtensionObject == extensionOid)
{ {
extensionScriptExecuted = false; extensionScriptExecuted = false;
@ -428,7 +430,7 @@ CitusExtraDataContainerFuncId(void)
if (cachedOid == InvalidOid) if (cachedOid == InvalidOid)
{ {
nameList = list_make2(makeString("pg_catalog"), nameList = list_make2(makeString("pg_catalog"),
makeString("citusdb_extradata_container")); makeString("citus_extradata_container"));
cachedOid = LookupFuncName(nameList, 1, paramOids, false); cachedOid = LookupFuncName(nameList, 1, paramOids, false);
} }
@ -741,7 +743,7 @@ LookupDistShardTuples(Oid relationId)
scanKey[0].sk_argument = ObjectIdGetDatum(relationId); scanKey[0].sk_argument = ObjectIdGetDatum(relationId);
scanDescriptor = systable_beginscan(pgDistShard, DistShardLogicalRelidIndexId(), true, scanDescriptor = systable_beginscan(pgDistShard, DistShardLogicalRelidIndexId(), true,
NULL, 1, scanKey); NULL, 1, scanKey);
currentShardTuple = systable_getnext(scanDescriptor); currentShardTuple = systable_getnext(scanDescriptor);
while (HeapTupleIsValid(currentShardTuple)) while (HeapTupleIsValid(currentShardTuple))
@ -765,7 +767,7 @@ LookupDistShardTuples(Oid relationId)
*/ */
static void static void
GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod, GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod,
Oid *intervalTypeId, int32 *intervalTypeMod) Oid *intervalTypeId, int32 *intervalTypeMod)
{ {
*intervalTypeId = InvalidOid; *intervalTypeId = InvalidOid;
*intervalTypeMod = -1; *intervalTypeMod = -1;
@ -794,7 +796,7 @@ GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod,
{ {
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("unsupported table partition type: %c", errmsg("unsupported table partition type: %c",
partitionMethod))); partitionMethod)));
} }
} }
} }
@ -806,7 +808,7 @@ GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod,
*/ */
static ShardInterval * static ShardInterval *
TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid intervalTypeId, TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid intervalTypeId,
int32 intervalTypeMod) int32 intervalTypeMod)
{ {
ShardInterval *shardInterval = NULL; ShardInterval *shardInterval = NULL;
bool isNull = false; bool isNull = false;
@ -815,16 +817,16 @@ TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid interva
Oid inputFunctionId = InvalidOid; Oid inputFunctionId = InvalidOid;
Oid typeIoParam = InvalidOid; Oid typeIoParam = InvalidOid;
Datum relationIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_logicalrelid, Datum relationIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_logicalrelid,
tupleDescriptor, &isNull); tupleDescriptor, &isNull);
Datum shardIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardid, Datum shardIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardid,
tupleDescriptor, &isNull); tupleDescriptor, &isNull);
Datum storageTypeDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardstorage, Datum storageTypeDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardstorage,
tupleDescriptor, &isNull); tupleDescriptor, &isNull);
Datum minValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardminvalue, Datum minValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardminvalue,
tupleDescriptor, &minValueNull); tupleDescriptor, &minValueNull);
Datum maxValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardmaxvalue, Datum maxValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardmaxvalue,
tupleDescriptor, &maxValueNull); tupleDescriptor, &maxValueNull);
Oid relationId = DatumGetObjectId(relationIdDatum); Oid relationId = DatumGetObjectId(relationIdDatum);
int64 shardId = DatumGetInt64(shardIdDatum); int64 shardId = DatumGetInt64(shardIdDatum);
@ -845,7 +847,7 @@ TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid interva
/* TODO: move this up the call stack to avoid per-tuple invocation? */ /* TODO: move this up the call stack to avoid per-tuple invocation? */
get_type_io_data(intervalTypeId, IOFunc_input, &intervalTypeLen, &intervalByVal, get_type_io_data(intervalTypeId, IOFunc_input, &intervalTypeLen, &intervalByVal,
&intervalAlign, &intervalDelim, &typeIoParam, &inputFunctionId); &intervalAlign, &intervalDelim, &typeIoParam, &inputFunctionId);
/* finally convert min/max values to their actual types */ /* finally convert min/max values to their actual types */
minValue = OidInputFunctionCall(inputFunctionId, minValueString, minValue = OidInputFunctionCall(inputFunctionId, minValueString,

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* multi_resowner.c * multi_resowner.c
* CitusDB resource owner integration * Citus resource owner integration
* *
* An extension can't directly add members to ResourceOwnerData. Instead we * An extension can't directly add members to ResourceOwnerData. Instead we
* have to use the resource owner callback mechanism. Right now it's * have to use the resource owner callback mechanism. Right now it's
@ -22,7 +22,8 @@
#include "distributed/multi_resowner.h" #include "distributed/multi_resowner.h"
typedef struct JobDirectoryEntry { typedef struct JobDirectoryEntry
{
ResourceOwner owner; ResourceOwner owner;
uint64 jobId; uint64 jobId;
} JobDirectoryEntry; } JobDirectoryEntry;
@ -44,8 +45,8 @@ MultiResourceOwnerReleaseCallback(ResourceReleasePhase phase,
bool isTopLevel, bool isTopLevel,
void *arg) void *arg)
{ {
int lastJobIndex = NumRegisteredJobDirectories - 1; int lastJobIndex = NumRegisteredJobDirectories - 1;
int jobIndex = 0; int jobIndex = 0;
if (phase == RESOURCE_RELEASE_AFTER_LOCKS) if (phase == RESOURCE_RELEASE_AFTER_LOCKS)
{ {
@ -79,7 +80,7 @@ MultiResourceOwnerReleaseCallback(ResourceReleasePhase phase,
void void
ResourceOwnerEnlargeJobDirectories(ResourceOwner owner) ResourceOwnerEnlargeJobDirectories(ResourceOwner owner)
{ {
int newMax = 0; int newMax = 0;
/* ensure callback is registered */ /* ensure callback is registered */
if (!RegisteredResownerCallback) if (!RegisteredResownerCallback)
@ -91,15 +92,17 @@ ResourceOwnerEnlargeJobDirectories(ResourceOwner owner)
if (RegisteredJobDirectories == NULL) if (RegisteredJobDirectories == NULL)
{ {
newMax = 16; newMax = 16;
RegisteredJobDirectories = (JobDirectoryEntry *) RegisteredJobDirectories =
MemoryContextAlloc(TopMemoryContext, newMax * sizeof(JobDirectoryEntry)); (JobDirectoryEntry *) MemoryContextAlloc(TopMemoryContext,
newMax * sizeof(JobDirectoryEntry));
NumAllocatedJobDirectories = newMax; NumAllocatedJobDirectories = newMax;
} }
else if (NumRegisteredJobDirectories + 1 > NumAllocatedJobDirectories) else if (NumRegisteredJobDirectories + 1 > NumAllocatedJobDirectories)
{ {
newMax = NumAllocatedJobDirectories * 2; newMax = NumAllocatedJobDirectories * 2;
RegisteredJobDirectories = (JobDirectoryEntry *) RegisteredJobDirectories =
repalloc(RegisteredJobDirectories, newMax * sizeof(JobDirectoryEntry)); (JobDirectoryEntry *) repalloc(RegisteredJobDirectories,
newMax * sizeof(JobDirectoryEntry));
NumAllocatedJobDirectories = newMax; NumAllocatedJobDirectories = newMax;
} }
} }
@ -123,8 +126,8 @@ ResourceOwnerRememberJobDirectory(ResourceOwner owner, uint64 jobId)
void void
ResourceOwnerForgetJobDirectory(ResourceOwner owner, uint64 jobId) ResourceOwnerForgetJobDirectory(ResourceOwner owner, uint64 jobId)
{ {
int lastJobIndex = NumRegisteredJobDirectories - 1; int lastJobIndex = NumRegisteredJobDirectories - 1;
int jobIndex = 0; int jobIndex = 0;
for (jobIndex = lastJobIndex; jobIndex >= 0; jobIndex--) for (jobIndex = lastJobIndex; jobIndex >= 0; jobIndex--)
{ {
@ -135,7 +138,8 @@ ResourceOwnerForgetJobDirectory(ResourceOwner owner, uint64 jobId)
/* move all later entries one up */ /* move all later entries one up */
while (jobIndex < lastJobIndex) while (jobIndex < lastJobIndex)
{ {
RegisteredJobDirectories[jobIndex] = RegisteredJobDirectories[jobIndex + 1]; RegisteredJobDirectories[jobIndex] =
RegisteredJobDirectories[jobIndex + 1];
jobIndex++; jobIndex++;
} }
NumRegisteredJobDirectories = lastJobIndex; NumRegisteredJobDirectories = lastJobIndex;

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* resource_lock.c * resource_lock.c
* Locking Infrastructure for CitusDB. * Locking Infrastructure for Citus.
* *
* To avoid introducing a new type of locktag - that then could not be * To avoid introducing a new type of locktag - that then could not be
* displayed by core functionality - we reuse advisory locks. If we'd just * displayed by core functionality - we reuse advisory locks. If we'd just
@ -14,9 +14,10 @@
*/ */
#include "postgres.h" #include "postgres.h"
#include "c.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "distributed/relay_utility.h"
#include "distributed/resource_lock.h" #include "distributed/resource_lock.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
@ -30,7 +31,7 @@
void void
LockShardDistributionMetadata(int64 shardId, LOCKMODE lockMode) LockShardDistributionMetadata(int64 shardId, LOCKMODE lockMode)
{ {
LOCKTAG tag; LOCKTAG tag;
const bool sessionLock = false; const bool sessionLock = false;
const bool dontWait = false; const bool dontWait = false;
@ -64,10 +65,12 @@ LockRelationDistributionMetadata(Oid relationId, LOCKMODE lockMode)
void void
LockShardResource(uint64 shardId, LOCKMODE lockmode) LockShardResource(uint64 shardId, LOCKMODE lockmode)
{ {
LOCKTAG tag; LOCKTAG tag;
const bool sessionLock = false; const bool sessionLock = false;
const bool dontWait = false; const bool dontWait = false;
AssertArg(shardId != INVALID_SHARD_ID);
SET_LOCKTAG_SHARD_RESOURCE(tag, MyDatabaseId, shardId); SET_LOCKTAG_SHARD_RESOURCE(tag, MyDatabaseId, shardId);
(void) LockAcquire(&tag, lockmode, sessionLock, dontWait); (void) LockAcquire(&tag, lockmode, sessionLock, dontWait);
@ -78,7 +81,7 @@ LockShardResource(uint64 shardId, LOCKMODE lockmode)
void void
UnlockShardResource(uint64 shardId, LOCKMODE lockmode) UnlockShardResource(uint64 shardId, LOCKMODE lockmode)
{ {
LOCKTAG tag; LOCKTAG tag;
const bool sessionLock = false; const bool sessionLock = false;
SET_LOCKTAG_SHARD_RESOURCE(tag, MyDatabaseId, shardId); SET_LOCKTAG_SHARD_RESOURCE(tag, MyDatabaseId, shardId);
@ -95,7 +98,7 @@ UnlockShardResource(uint64 shardId, LOCKMODE lockmode)
void void
LockJobResource(uint64 jobId, LOCKMODE lockmode) LockJobResource(uint64 jobId, LOCKMODE lockmode)
{ {
LOCKTAG tag; LOCKTAG tag;
const bool sessionLock = false; const bool sessionLock = false;
const bool dontWait = false; const bool dontWait = false;
@ -109,7 +112,7 @@ LockJobResource(uint64 jobId, LOCKMODE lockmode)
void void
UnlockJobResource(uint64 jobId, LOCKMODE lockmode) UnlockJobResource(uint64 jobId, LOCKMODE lockmode)
{ {
LOCKTAG tag; LOCKTAG tag;
const bool sessionLock = false; const bool sessionLock = false;
SET_LOCKTAG_JOB_RESOURCE(tag, MyDatabaseId, jobId); SET_LOCKTAG_JOB_RESOURCE(tag, MyDatabaseId, jobId);

View File

@ -50,7 +50,7 @@
#include "utils/memutils.h" #include "utils/memutils.h"
int TaskTrackerDelay = 200; /* process sleep interval in millisecs */ int TaskTrackerDelay = 200; /* process sleep interval in millisecs */
int MaxRunningTasksPerNode = 16; /* max number of running tasks */ int MaxRunningTasksPerNode = 16; /* max number of running tasks */
int MaxTrackedTasksPerNode = 1024; /* max number of tracked tasks */ int MaxTrackedTasksPerNode = 1024; /* max number of tracked tasks */
WorkerTasksSharedStateData *WorkerTasksSharedState; /* shared memory state */ WorkerTasksSharedStateData *WorkerTasksSharedState; /* shared memory state */
@ -76,10 +76,10 @@ static void TrackerCleanupJobSchemas(void);
static void TrackerCleanupConnections(HTAB *WorkerTasksHash); static void TrackerCleanupConnections(HTAB *WorkerTasksHash);
static void TrackerRegisterShutDown(HTAB *WorkerTasksHash); static void TrackerRegisterShutDown(HTAB *WorkerTasksHash);
static void TrackerDelayLoop(void); static void TrackerDelayLoop(void);
static List *SchedulableTaskList(HTAB *WorkerTasksHash); static List * SchedulableTaskList(HTAB *WorkerTasksHash);
static WorkerTask * SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash); static WorkerTask * SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash);
static uint32 CountTasksMatchingCriteria(HTAB *WorkerTasksHash, static uint32 CountTasksMatchingCriteria(HTAB *WorkerTasksHash,
bool (*CriteriaFunction) (WorkerTask *)); bool (*CriteriaFunction)(WorkerTask *));
static bool RunningTask(WorkerTask *workerTask); static bool RunningTask(WorkerTask *workerTask);
static bool SchedulableTask(WorkerTask *workerTask); static bool SchedulableTask(WorkerTask *workerTask);
static int CompareTasksByTime(const void *first, const void *second); static int CompareTasksByTime(const void *first, const void *second);
@ -240,7 +240,7 @@ TaskTrackerMain(Datum main_arg)
/* /*
* Reload worker membership file. For now we do that in the task * Reload worker membership file. For now we do that in the task
* tracker because that's currently the only background worker in * tracker because that's currently the only background worker in
* CitusDB. And only background workers allow us to safely * Citus. And only background workers allow us to safely
* register a SIGHUP handler. * register a SIGHUP handler.
*/ */
LoadWorkerNodeList(WorkerListFileName); LoadWorkerNodeList(WorkerListFileName);
@ -295,7 +295,7 @@ WorkerTasksHashEnter(uint64 jobId, uint32 taskId)
{ {
ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory"), errmsg("out of shared memory"),
errhint("Try increasing citusdb.max_tracked_tasks_per_node."))); errhint("Try increasing citus.max_tracked_tasks_per_node.")));
} }
/* check that we do not have the same task assigned twice to this node */ /* check that we do not have the same task assigned twice to this node */
@ -393,7 +393,7 @@ TrackerCleanupJobSchemas(void)
/* /*
* We create cleanup tasks since we can't remove schemas within the task * We create cleanup tasks since we can't remove schemas within the task
* tracker process. We also assign high priorities to these tasks so * tracker process. We also assign high priorities to these tasks so
* that they get scheduled before everyone else. * that they get scheduled before everyone else.
*/ */
cleanupTask = WorkerTasksHashEnter(jobId, taskIndex); cleanupTask = WorkerTasksHashEnter(jobId, taskIndex);
cleanupTask->assignedAt = HIGH_PRIORITY_TASK_TIME; cleanupTask->assignedAt = HIGH_PRIORITY_TASK_TIME;
@ -440,7 +440,7 @@ TrackerCleanupConnections(HTAB *WorkerTasksHash)
currentTask->connectionId = INVALID_CONNECTION_ID; currentTask->connectionId = INVALID_CONNECTION_ID;
} }
currentTask = (WorkerTask *) hash_seq_search(&status); currentTask = (WorkerTask *) hash_seq_search(&status);
} }
} }
@ -494,8 +494,9 @@ TrackerDelayLoop(void)
} }
} }
/* ------------------------------------------------------------ /* ------------------------------------------------------------
* Signal handling and shared hash initialization functions follow * Signal handling and shared hash initialization functions follow
* ------------------------------------------------------------ * ------------------------------------------------------------
*/ */
@ -503,7 +504,7 @@ TrackerDelayLoop(void)
static void static void
TrackerSigHupHandler(SIGNAL_ARGS) TrackerSigHupHandler(SIGNAL_ARGS)
{ {
int save_errno = errno; int save_errno = errno;
got_SIGHUP = true; got_SIGHUP = true;
if (MyProc != NULL) if (MyProc != NULL)
@ -519,7 +520,7 @@ TrackerSigHupHandler(SIGNAL_ARGS)
static void static void
TrackerShutdownHandler(SIGNAL_ARGS) TrackerShutdownHandler(SIGNAL_ARGS)
{ {
int save_errno = errno; int save_errno = errno;
got_SIGTERM = true; got_SIGTERM = true;
if (MyProc != NULL) if (MyProc != NULL)
@ -579,10 +580,10 @@ TaskTrackerShmemInit(void)
LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
/* allocate struct containing task tracker related shared state */ /* allocate struct containing task tracker related shared state */
WorkerTasksSharedState = (WorkerTasksSharedStateData *) WorkerTasksSharedState =
ShmemInitStruct("Worker Task Control", (WorkerTasksSharedStateData *) ShmemInitStruct("Worker Task Control",
sizeof(WorkerTasksSharedStateData), sizeof(WorkerTasksSharedStateData),
&alreadyInitialized); &alreadyInitialized);
if (!alreadyInitialized) if (!alreadyInitialized)
{ {
@ -607,6 +608,7 @@ TaskTrackerShmemInit(void)
} }
} }
/* ------------------------------------------------------------ /* ------------------------------------------------------------
* Task scheduling and management functions follow * Task scheduling and management functions follow
* ------------------------------------------------------------ * ------------------------------------------------------------
@ -638,7 +640,7 @@ SchedulableTaskList(HTAB *WorkerTasksHash)
schedulableTaskCount = CountTasksMatchingCriteria(WorkerTasksHash, &SchedulableTask); schedulableTaskCount = CountTasksMatchingCriteria(WorkerTasksHash, &SchedulableTask);
if (schedulableTaskCount == 0) if (schedulableTaskCount == 0)
{ {
return NIL; /* we do not have any new tasks to schedule */ return NIL; /* we do not have any new tasks to schedule */
} }
tasksToScheduleCount = MaxRunningTasksPerNode - runningTaskCount; tasksToScheduleCount = MaxRunningTasksPerNode - runningTaskCount;
@ -653,7 +655,7 @@ SchedulableTaskList(HTAB *WorkerTasksHash)
for (queueIndex = 0; queueIndex < tasksToScheduleCount; queueIndex++) for (queueIndex = 0; queueIndex < tasksToScheduleCount; queueIndex++)
{ {
WorkerTask *schedulableTask = (WorkerTask *) palloc0(sizeof(WorkerTask)); WorkerTask *schedulableTask = (WorkerTask *) palloc0(sizeof(WorkerTask));
schedulableTask->jobId = schedulableTaskQueue[queueIndex].jobId; schedulableTask->jobId = schedulableTaskQueue[queueIndex].jobId;
schedulableTask->taskId = schedulableTaskQueue[queueIndex].taskId; schedulableTask->taskId = schedulableTaskQueue[queueIndex].taskId;
schedulableTaskList = lappend(schedulableTaskList, schedulableTask); schedulableTaskList = lappend(schedulableTaskList, schedulableTask);
@ -681,13 +683,13 @@ SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash)
uint32 queueIndex = 0; uint32 queueIndex = 0;
/* our priority queue size equals to the number of schedulable tasks */ /* our priority queue size equals to the number of schedulable tasks */
queueSize = CountTasksMatchingCriteria(WorkerTasksHash, &SchedulableTask); queueSize = CountTasksMatchingCriteria(WorkerTasksHash, &SchedulableTask);
if (queueSize == 0) if (queueSize == 0)
{ {
return NULL; return NULL;
} }
/* allocate an array of tasks for our priority queue */ /* allocate an array of tasks for our priority queue */
priorityQueue = (WorkerTask *) palloc0(sizeof(WorkerTask) * queueSize); priorityQueue = (WorkerTask *) palloc0(sizeof(WorkerTask) * queueSize);
/* copy tasks in the shared hash to the priority queue */ /* copy tasks in the shared hash to the priority queue */
@ -719,7 +721,7 @@ SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash)
/* Counts the number of tasks that match the given criteria function. */ /* Counts the number of tasks that match the given criteria function. */
static uint32 static uint32
CountTasksMatchingCriteria(HTAB *WorkerTasksHash, CountTasksMatchingCriteria(HTAB *WorkerTasksHash,
bool (*CriteriaFunction) (WorkerTask *)) bool (*CriteriaFunction)(WorkerTask *))
{ {
HASH_SEQ_STATUS status; HASH_SEQ_STATUS status;
WorkerTask *currentTask = NULL; WorkerTask *currentTask = NULL;
@ -730,13 +732,13 @@ CountTasksMatchingCriteria(HTAB *WorkerTasksHash,
currentTask = (WorkerTask *) hash_seq_search(&status); currentTask = (WorkerTask *) hash_seq_search(&status);
while (currentTask != NULL) while (currentTask != NULL)
{ {
bool matchesCriteria = (*CriteriaFunction) (currentTask); bool matchesCriteria = (*CriteriaFunction)(currentTask);
if (matchesCriteria) if (matchesCriteria)
{ {
taskCount++; taskCount++;
} }
currentTask = (WorkerTask *) hash_seq_search(&status); currentTask = (WorkerTask *) hash_seq_search(&status);
} }
return taskCount; return taskCount;
@ -775,7 +777,7 @@ SchedulableTask(WorkerTask *workerTask)
static int static int
CompareTasksByTime(const void *first, const void *second) CompareTasksByTime(const void *first, const void *second)
{ {
WorkerTask *firstTask = (WorkerTask *) first; WorkerTask *firstTask = (WorkerTask *) first;
WorkerTask *secondTask = (WorkerTask *) second; WorkerTask *secondTask = (WorkerTask *) second;
/* tasks that are assigned earlier have higher priority */ /* tasks that are assigned earlier have higher priority */
@ -893,7 +895,7 @@ ManageWorkerTask(WorkerTask *workerTask, HTAB *WorkerTasksHash)
{ {
case TASK_ASSIGNED: case TASK_ASSIGNED:
{ {
break; /* nothing to do until the task gets scheduled */ break; /* nothing to do until the task gets scheduled */
} }
case TASK_SCHEDULED: case TASK_SCHEDULED:

View File

@ -57,7 +57,7 @@ task_tracker_assign_task(PG_FUNCTION_ARGS)
{ {
uint64 jobId = PG_GETARG_INT64(0); uint64 jobId = PG_GETARG_INT64(0);
uint32 taskId = PG_GETARG_UINT32(1); uint32 taskId = PG_GETARG_UINT32(1);
text *taskCallStringText = PG_GETARG_TEXT_P(2); text *taskCallStringText = PG_GETARG_TEXT_P(2);
StringInfo jobSchemaName = JobSchemaName(jobId); StringInfo jobSchemaName = JobSchemaName(jobId);
bool schemaExists = false; bool schemaExists = false;
@ -185,7 +185,7 @@ task_tracker_cleanup_job(PG_FUNCTION_ARGS)
CleanupTask(currentTask); CleanupTask(currentTask);
} }
currentTask = (WorkerTask *) hash_seq_search(&status); currentTask = (WorkerTask *) hash_seq_search(&status);
} }
LWLockRelease(WorkerTasksSharedState->taskHashLock); LWLockRelease(WorkerTasksSharedState->taskHashLock);
@ -308,7 +308,7 @@ CreateTask(uint64 jobId, uint32 taskId, char *taskCallString)
} }
/* /*
* UpdateTask updates the call string text for an already existing task. Note * UpdateTask updates the call string text for an already existing task. Note
* that this function expects the caller to hold an exclusive lock over the * that this function expects the caller to hold an exclusive lock over the
* shared hash. * shared hash.
@ -331,7 +331,7 @@ UpdateTask(WorkerTask *workerTask, char *taskCallString)
if (taskStatus == TASK_SUCCEEDED || taskStatus == TASK_CANCEL_REQUESTED || if (taskStatus == TASK_SUCCEEDED || taskStatus == TASK_CANCEL_REQUESTED ||
taskStatus == TASK_CANCELED) taskStatus == TASK_CANCELED)
{ {
; /* nothing to do */ /* nothing to do */
} }
else if (taskStatus == TASK_PERMANENTLY_FAILED) else if (taskStatus == TASK_PERMANENTLY_FAILED)
{ {

View File

@ -53,11 +53,14 @@ static void ReceiveResourceCleanup(int32 connectionId, const char *filename,
static void DeleteFile(const char *filename); static void DeleteFile(const char *filename);
static void FetchTableCommon(text *tableName, uint64 remoteTableSize, static void FetchTableCommon(text *tableName, uint64 remoteTableSize,
ArrayType *nodeNameObject, ArrayType *nodePortObject, ArrayType *nodeNameObject, ArrayType *nodePortObject,
bool (*FetchTableFunction) (const char *, uint32, StringInfo)); bool (*FetchTableFunction)(const char *, uint32,
StringInfo));
static uint64 LocalTableSize(Oid relationId); static uint64 LocalTableSize(Oid relationId);
static uint64 ExtractShardId(StringInfo tableName); static uint64 ExtractShardId(StringInfo tableName);
static bool FetchRegularTable(const char *nodeName, uint32 nodePort, StringInfo tableName); static bool FetchRegularTable(const char *nodeName, uint32 nodePort,
static bool FetchForeignTable(const char *nodeName, uint32 nodePort, StringInfo tableName); StringInfo tableName);
static bool FetchForeignTable(const char *nodeName, uint32 nodePort,
StringInfo tableName);
static List * TableDDLCommandList(const char *nodeName, uint32 nodePort, static List * TableDDLCommandList(const char *nodeName, uint32 nodePort,
StringInfo tableName); StringInfo tableName);
static StringInfo ForeignFilePath(const char *nodeName, uint32 nodePort, static StringInfo ForeignFilePath(const char *nodeName, uint32 nodePort,
@ -85,7 +88,7 @@ worker_fetch_partition_file(PG_FUNCTION_ARGS)
uint64 jobId = PG_GETARG_INT64(0); uint64 jobId = PG_GETARG_INT64(0);
uint32 partitionTaskId = PG_GETARG_UINT32(1); uint32 partitionTaskId = PG_GETARG_UINT32(1);
uint32 partitionFileId = PG_GETARG_UINT32(2); uint32 partitionFileId = PG_GETARG_UINT32(2);
uint32 upstreamTaskId = PG_GETARG_UINT32(3); uint32 upstreamTaskId = PG_GETARG_UINT32(3);
text *nodeNameText = PG_GETARG_TEXT_P(4); text *nodeNameText = PG_GETARG_TEXT_P(4);
uint32 nodePort = PG_GETARG_UINT32(5); uint32 nodePort = PG_GETARG_UINT32(5);
char *nodeName = NULL; char *nodeName = NULL;
@ -226,7 +229,7 @@ ReceiveRegularFile(const char *nodeName, uint32 nodePort,
char filename[MAXPGPATH]; char filename[MAXPGPATH];
int closed = -1; int closed = -1;
const int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); const int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY);
const int fileMode = (S_IRUSR | S_IWUSR); const int fileMode = (S_IRUSR | S_IWUSR);
QueryStatus queryStatus = CLIENT_INVALID_QUERY; QueryStatus queryStatus = CLIENT_INVALID_QUERY;
int32 connectionId = INVALID_CONNECTION_ID; int32 connectionId = INVALID_CONNECTION_ID;
@ -309,7 +312,7 @@ ReceiveRegularFile(const char *nodeName, uint32 nodePort,
} }
else if (copyStatus == CLIENT_COPY_MORE) else if (copyStatus == CLIENT_COPY_MORE)
{ {
; /* remote node will continue to send more data */ /* remote node will continue to send more data */
} }
else else
{ {
@ -468,7 +471,7 @@ worker_fetch_foreign_file(PG_FUNCTION_ARGS)
static void static void
FetchTableCommon(text *tableNameText, uint64 remoteTableSize, FetchTableCommon(text *tableNameText, uint64 remoteTableSize,
ArrayType *nodeNameObject, ArrayType *nodePortObject, ArrayType *nodeNameObject, ArrayType *nodePortObject,
bool (*FetchTableFunction) (const char *, uint32, StringInfo)) bool (*FetchTableFunction)(const char *, uint32, StringInfo))
{ {
StringInfo tableName = NULL; StringInfo tableName = NULL;
char *tableNameCString = NULL; char *tableNameCString = NULL;
@ -531,7 +534,7 @@ FetchTableCommon(text *tableNameText, uint64 remoteTableSize,
if (remoteTableSize > localTableSize) if (remoteTableSize > localTableSize)
{ {
/* table is not up to date, drop the table */ /* table is not up to date, drop the table */
ObjectAddress tableObject = {InvalidOid, InvalidOid, 0}; ObjectAddress tableObject = { InvalidOid, InvalidOid, 0 };
tableObject.classId = RelationRelationId; tableObject.classId = RelationRelationId;
tableObject.objectId = relationId; tableObject.objectId = relationId;
@ -554,7 +557,7 @@ FetchTableCommon(text *tableNameText, uint64 remoteTableSize,
char *nodeName = TextDatumGetCString(nodeNameDatum); char *nodeName = TextDatumGetCString(nodeNameDatum);
uint32 nodePort = DatumGetUInt32(nodePortDatum); uint32 nodePort = DatumGetUInt32(nodePortDatum);
tableFetched = (*FetchTableFunction) (nodeName, nodePort, tableName); tableFetched = (*FetchTableFunction)(nodeName, nodePort, tableName);
nodeIndex++; nodeIndex++;
} }
@ -994,11 +997,10 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
StringInfo remoteCopyCommand = NULL; StringInfo remoteCopyCommand = NULL;
CopyStmt *localCopyCommand = NULL; CopyStmt *localCopyCommand = NULL;
RangeVar *localTable = NULL; RangeVar *localTable = NULL;
uint64 copiedRowCount = 0;
uint64 shardId = INVALID_SHARD_ID; uint64 shardId = INVALID_SHARD_ID;
bool received = false; bool received = false;
char *quotedTableName = NULL; char *quotedTableName = NULL;
const char *queryString = NULL; StringInfo queryString = NULL;
const char *schemaName = NULL; const char *schemaName = NULL;
/* copy remote table's data to this node */ /* copy remote table's data to this node */
@ -1010,7 +1012,7 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
* the transaction for this function commits, this lock will automatically * the transaction for this function commits, this lock will automatically
* be released. This ensures appends to a shard happen in a serial manner. * be released. This ensures appends to a shard happen in a serial manner.
*/ */
shardId = ExtractShardId(shardNameString); shardId = ExtractShardId(shardNameString);
LockShardResource(shardId, AccessExclusiveLock); LockShardResource(shardId, AccessExclusiveLock);
localFilePath = makeStringInfo(); localFilePath = makeStringInfo();
@ -1032,8 +1034,13 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
localTable = makeRangeVar((char *) schemaName, shardNameString->data, -1); localTable = makeRangeVar((char *) schemaName, shardNameString->data, -1);
localCopyCommand = CopyStatement(localTable, localFilePath->data); localCopyCommand = CopyStatement(localTable, localFilePath->data);
DoCopy(localCopyCommand, queryString, &copiedRowCount); quotedTableName = quote_qualified_identifier(schemaName, shardNameString->data);
(void) copiedRowCount;
queryString = makeStringInfo();
appendStringInfo(queryString, COPY_IN_COMMAND, quotedTableName, localFilePath->data);
ProcessUtility((Node *) localCopyCommand, queryString->data,
PROCESS_UTILITY_TOPLEVEL, NULL, None_Receiver, NULL);
/* finally delete the temporary file we created */ /* finally delete the temporary file we created */
DeleteFile(localFilePath->data); DeleteFile(localFilePath->data);
@ -1049,7 +1056,7 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
static bool static bool
check_log_statement(List *statementList) check_log_statement(List *statementList)
{ {
ListCell *statementCell; ListCell *statementCell;
if (log_statement == LOGSTMT_NONE) if (log_statement == LOGSTMT_NONE)
{ {

View File

@ -40,22 +40,22 @@ worker_foreign_file_path(PG_FUNCTION_ARGS)
ForeignTable *foreignTable = GetForeignTable(relationId); ForeignTable *foreignTable = GetForeignTable(relationId);
ListCell *optionCell = NULL; ListCell *optionCell = NULL;
foreach(optionCell, foreignTable->options) foreach(optionCell, foreignTable->options)
{ {
DefElem *option = (DefElem *) lfirst(optionCell); DefElem *option = (DefElem *) lfirst(optionCell);
char *optionName = option->defname; char *optionName = option->defname;
int compareResult = strncmp(optionName, FOREIGN_FILENAME_OPTION, MAXPGPATH); int compareResult = strncmp(optionName, FOREIGN_FILENAME_OPTION, MAXPGPATH);
if (compareResult == 0) if (compareResult == 0)
{ {
char *optionValue = defGetString(option); char *optionValue = defGetString(option);
foreignFilePath = cstring_to_text(optionValue); foreignFilePath = cstring_to_text(optionValue);
break; break;
} }
} }
/* check that we found the filename option */ /* check that we found the filename option */
if (foreignFilePath == NULL) if (foreignFilePath == NULL)
{ {
char *relationName = get_rel_name(relationId); char *relationName = get_rel_name(relationId);
ereport(ERROR, (errmsg("could not find filename for foreign table: \"%s\"", ereport(ERROR, (errmsg("could not find filename for foreign table: \"%s\"",

View File

@ -133,7 +133,7 @@ worker_merge_files_and_run_query(PG_FUNCTION_ARGS)
const char *createMergeTableQuery = text_to_cstring(createMergeTableQueryText); const char *createMergeTableQuery = text_to_cstring(createMergeTableQueryText);
const char *createIntermediateTableQuery = const char *createIntermediateTableQuery =
text_to_cstring(createIntermediateTableQueryText); text_to_cstring(createIntermediateTableQueryText);
StringInfo taskDirectoryName = TaskDirectoryName(jobId, taskId); StringInfo taskDirectoryName = TaskDirectoryName(jobId, taskId);
StringInfo jobSchemaName = JobSchemaName(jobId); StringInfo jobSchemaName = JobSchemaName(jobId);
@ -170,14 +170,14 @@ worker_merge_files_and_run_query(PG_FUNCTION_ARGS)
if (setSearchPathResult < 0) if (setSearchPathResult < 0)
{ {
ereport(ERROR, (errmsg("execution was not successful \"%s\"", ereport(ERROR, (errmsg("execution was not successful \"%s\"",
setSearchPathString->data))); setSearchPathString->data)));
} }
createMergeTableResult = SPI_exec(createMergeTableQuery, 0); createMergeTableResult = SPI_exec(createMergeTableQuery, 0);
if (createMergeTableResult < 0) if (createMergeTableResult < 0)
{ {
ereport(ERROR, (errmsg("execution was not successful \"%s\"", ereport(ERROR, (errmsg("execution was not successful \"%s\"",
createMergeTableQuery))); createMergeTableQuery)));
} }
appendStringInfo(mergeTableName, "%s%s", intermediateTableName->data, appendStringInfo(mergeTableName, "%s%s", intermediateTableName->data,
@ -188,7 +188,7 @@ worker_merge_files_and_run_query(PG_FUNCTION_ARGS)
if (createIntermediateTableResult < 0) if (createIntermediateTableResult < 0)
{ {
ereport(ERROR, (errmsg("execution was not successful \"%s\"", ereport(ERROR, (errmsg("execution was not successful \"%s\"",
createIntermediateTableQuery))); createIntermediateTableQuery)));
} }
finished = SPI_finish(); finished = SPI_finish();
@ -256,8 +256,8 @@ JobSchemaName(uint64 jobId)
*/ */
#ifdef HAVE_INTTYPES_H #ifdef HAVE_INTTYPES_H
StringInfo jobSchemaName = makeStringInfo(); StringInfo jobSchemaName = makeStringInfo();
appendStringInfo(jobSchemaName, "%s%0*"PRIu64, appendStringInfo(jobSchemaName, "%s%0*" PRIu64, JOB_SCHEMA_PREFIX,
JOB_SCHEMA_PREFIX, MIN_JOB_DIRNAME_WIDTH, jobId); MIN_JOB_DIRNAME_WIDTH, jobId);
#else #else
StringInfo jobSchemaName = makeStringInfo(); StringInfo jobSchemaName = makeStringInfo();
appendStringInfo(jobSchemaName, "%s%0*llu", appendStringInfo(jobSchemaName, "%s%0*llu",

View File

@ -59,7 +59,7 @@ static void FileOutputStreamWrite(FileOutputStream file, StringInfo dataToWrite)
static void FileOutputStreamFlush(FileOutputStream file); static void FileOutputStreamFlush(FileOutputStream file);
static void FilterAndPartitionTable(const char *filterQuery, static void FilterAndPartitionTable(const char *filterQuery,
const char *columnName, Oid columnType, const char *columnName, Oid columnType,
uint32 (*PartitionIdFunction) (Datum, const void *), uint32 (*PartitionIdFunction)(Datum, const void *),
const void *partitionIdContext, const void *partitionIdContext,
FileOutputStream *partitionFileArray, FileOutputStream *partitionFileArray,
uint32 fileCount); uint32 fileCount);
@ -105,7 +105,7 @@ worker_range_partition_table(PG_FUNCTION_ARGS)
uint32 taskId = PG_GETARG_UINT32(1); uint32 taskId = PG_GETARG_UINT32(1);
text *filterQueryText = PG_GETARG_TEXT_P(2); text *filterQueryText = PG_GETARG_TEXT_P(2);
text *partitionColumnText = PG_GETARG_TEXT_P(3); text *partitionColumnText = PG_GETARG_TEXT_P(3);
Oid partitionColumnType = PG_GETARG_OID(4); Oid partitionColumnType = PG_GETARG_OID(4);
ArrayType *splitPointObject = PG_GETARG_ARRAYTYPE_P(5); ArrayType *splitPointObject = PG_GETARG_ARRAYTYPE_P(5);
const char *filterQuery = text_to_cstring(filterQueryText); const char *filterQuery = text_to_cstring(filterQueryText);
@ -125,7 +125,7 @@ worker_range_partition_table(PG_FUNCTION_ARGS)
if (splitPointType != partitionColumnType) if (splitPointType != partitionColumnType)
{ {
ereport(ERROR, (errmsg("partition column type %u and split point type %u " ereport(ERROR, (errmsg("partition column type %u and split point type %u "
"do not match", partitionColumnType, splitPointType))); "do not match", partitionColumnType, splitPointType)));
} }
/* use column's type information to get the comparison function */ /* use column's type information to get the comparison function */
@ -181,7 +181,7 @@ worker_hash_partition_table(PG_FUNCTION_ARGS)
uint32 taskId = PG_GETARG_UINT32(1); uint32 taskId = PG_GETARG_UINT32(1);
text *filterQueryText = PG_GETARG_TEXT_P(2); text *filterQueryText = PG_GETARG_TEXT_P(2);
text *partitionColumnText = PG_GETARG_TEXT_P(3); text *partitionColumnText = PG_GETARG_TEXT_P(3);
Oid partitionColumnType = PG_GETARG_OID(4); Oid partitionColumnType = PG_GETARG_OID(4);
uint32 partitionCount = PG_GETARG_UINT32(5); uint32 partitionCount = PG_GETARG_UINT32(5);
const char *filterQuery = text_to_cstring(filterQueryText); const char *filterQuery = text_to_cstring(filterQueryText);
@ -463,7 +463,7 @@ JobDirectoryName(uint64 jobId)
*/ */
#ifdef HAVE_INTTYPES_H #ifdef HAVE_INTTYPES_H
StringInfo jobDirectoryName = makeStringInfo(); StringInfo jobDirectoryName = makeStringInfo();
appendStringInfo(jobDirectoryName, "base/%s/%s%0*"PRIu64, appendStringInfo(jobDirectoryName, "base/%s/%s%0*" PRIu64,
PG_JOB_CACHE_DIR, JOB_DIRECTORY_PREFIX, PG_JOB_CACHE_DIR, JOB_DIRECTORY_PREFIX,
MIN_JOB_DIRNAME_WIDTH, jobId); MIN_JOB_DIRNAME_WIDTH, jobId);
#else #else
@ -726,7 +726,7 @@ FileOutputStreamFlush(FileOutputStream file)
static void static void
FilterAndPartitionTable(const char *filterQuery, FilterAndPartitionTable(const char *filterQuery,
const char *partitionColumnName, Oid partitionColumnType, const char *partitionColumnName, Oid partitionColumnType,
uint32 (*PartitionIdFunction) (Datum, const void *), uint32 (*PartitionIdFunction)(Datum, const void *),
const void *partitionIdContext, const void *partitionIdContext,
FileOutputStream *partitionFileArray, FileOutputStream *partitionFileArray,
uint32 fileCount) uint32 fileCount)
@ -794,7 +794,7 @@ FilterAndPartitionTable(const char *filterQuery,
FileOutputStream partitionFile = { 0, 0, 0 }; FileOutputStream partitionFile = { 0, 0, 0 };
StringInfo rowText = NULL; StringInfo rowText = NULL;
Datum partitionKey = 0; Datum partitionKey = 0;
bool partitionKeyNull = false; bool partitionKeyNull = false;
uint32 partitionId = 0; uint32 partitionId = 0;
partitionKey = SPI_getbinval(row, rowDescriptor, partitionKey = SPI_getbinval(row, rowDescriptor,
@ -808,7 +808,7 @@ FilterAndPartitionTable(const char *filterQuery,
*/ */
if (!partitionKeyNull) if (!partitionKeyNull)
{ {
partitionId = (*PartitionIdFunction) (partitionKey, partitionIdContext); partitionId = (*PartitionIdFunction)(partitionKey, partitionIdContext);
} }
else else
{ {
@ -926,7 +926,7 @@ InitRowOutputState(void)
/* initialize defaults for printing null values */ /* initialize defaults for printing null values */
char *nullPrint = pstrdup("\\N"); char *nullPrint = pstrdup("\\N");
int nullPrintLen = strlen(nullPrint); int nullPrintLen = strlen(nullPrint);
char *nullPrintClient = pg_server_to_any(nullPrint, nullPrintLen, fileEncoding); char *nullPrintClient = pg_server_to_any(nullPrint, nullPrintLen, fileEncoding);
/* set default text output characters */ /* set default text output characters */
@ -946,7 +946,7 @@ InitRowOutputState(void)
} }
/* set up transcoding information and default text output characters */ /* set up transcoding information and default text output characters */
if ( (fileEncoding != databaseEncoding) || (databaseEncodingMaxLength > 1) ) if ((fileEncoding != databaseEncoding) || (databaseEncodingMaxLength > 1))
{ {
rowOutputState->need_transcoding = true; rowOutputState->need_transcoding = true;
} }
@ -1057,7 +1057,7 @@ OutputRow(HeapTuple row, TupleDesc rowDescriptor,
CopySendString(rowOutputState, rowOutputState->null_print_client); CopySendString(rowOutputState, rowOutputState->null_print_client);
} }
lastColumn = ((columnIndex+1) == columnCount); lastColumn = ((columnIndex + 1) == columnCount);
if (!lastColumn) if (!lastColumn)
{ {
CopySendChar(rowOutputState, rowOutputState->delim[0]); CopySendChar(rowOutputState, rowOutputState->delim[0]);
@ -1094,9 +1094,9 @@ OutputBinaryHeaders(FileOutputStream *partitionFileArray, uint32 fileCount)
{ {
/* Generate header for a binary copy */ /* Generate header for a binary copy */
const int32 zero = 0; const int32 zero = 0;
FileOutputStream partitionFile = {0, 0, 0}; FileOutputStream partitionFile = { 0, 0, 0 };
PartialCopyStateData headerOutputStateData; PartialCopyStateData headerOutputStateData;
PartialCopyState headerOutputState = (PartialCopyState) &headerOutputStateData; PartialCopyState headerOutputState = (PartialCopyState) & headerOutputStateData;
memset(headerOutputState, 0, sizeof(PartialCopyStateData)); memset(headerOutputState, 0, sizeof(PartialCopyStateData));
headerOutputState->fe_msgbuf = makeStringInfo(); headerOutputState->fe_msgbuf = makeStringInfo();
@ -1128,9 +1128,9 @@ OutputBinaryFooters(FileOutputStream *partitionFileArray, uint32 fileCount)
{ {
/* Generate footer for a binary copy */ /* Generate footer for a binary copy */
int16 negative = -1; int16 negative = -1;
FileOutputStream partitionFile = {0, 0, 0}; FileOutputStream partitionFile = { 0, 0, 0 };
PartialCopyStateData footerOutputStateData; PartialCopyStateData footerOutputStateData;
PartialCopyState footerOutputState = (PartialCopyState) &footerOutputStateData; PartialCopyState footerOutputState = (PartialCopyState) & footerOutputStateData;
memset(footerOutputState, 0, sizeof(PartialCopyStateData)); memset(footerOutputState, 0, sizeof(PartialCopyStateData));
footerOutputState->fe_msgbuf = makeStringInfo(); footerOutputState->fe_msgbuf = makeStringInfo();
@ -1143,6 +1143,7 @@ OutputBinaryFooters(FileOutputStream *partitionFileArray, uint32 fileCount)
} }
/* *INDENT-OFF* */
/* Append data to the copy buffer in outputState */ /* Append data to the copy buffer in outputState */
static void static void
CopySendData(PartialCopyState outputState, const void *databuf, int datasize) CopySendData(PartialCopyState outputState, const void *databuf, int datasize)
@ -1282,6 +1283,7 @@ CopyAttributeOutText(PartialCopyState cstate, char *string)
} }
/* *INDENT-ON* */
/* Helper function to send pending copy output */ /* Helper function to send pending copy output */
static inline void static inline void
CopyFlushOutput(PartialCopyState cstate, char *start, char *pointer) CopyFlushOutput(PartialCopyState cstate, char *start, char *pointer)
@ -1359,7 +1361,7 @@ RangePartitionId(Datum partitionValue, const void *context)
currentLength = currentLength - halfLength - 1; currentLength = currentLength - halfLength - 1;
} }
} }
return firstIndex; return firstIndex;
} }

View File

@ -9,12 +9,12 @@
# #
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
citusdb_subdir = src/bin/csql citus_subdir = src/bin/csql
citusdb_top_builddir = ../../.. citus_top_builddir = ../../..
PROGRAM = csql PROGRAM = csql
PGFILEDESC = "csql - the CitusDB interactive terminal" PGFILEDESC = "csql - the Citus interactive terminal"
PGAPPICON=win32 PGAPPICON=win32
OBJS =command.o common.o help.o input.o stringutils.o mainloop.o copy.o \ OBJS =command.o common.o help.o input.o stringutils.o mainloop.o copy.o \
@ -26,7 +26,7 @@ OBJS =command.o common.o help.o input.o stringutils.o mainloop.o copy.o \
PG_LIBS = $(libpq) PG_LIBS = $(libpq)
include $(citusdb_top_builddir)/Makefile.global include $(citus_top_builddir)/Makefile.global
override CPPFLAGS += -I$(libpq_srcdir) -I$(top_srcdir)/src/bin/csql override CPPFLAGS += -I$(libpq_srcdir) -I$(top_srcdir)/src/bin/csql

View File

@ -1,5 +1,5 @@
/* /*
* csql - the CitusDB interactive terminal * csql - the Citus interactive terminal
* copy_options.c * copy_options.c
* Routines for parsing copy and stage meta commands. * Routines for parsing copy and stage meta commands.
* *
@ -16,7 +16,22 @@
#include "stringutils.h" #include "stringutils.h"
/* Concatenates "more" onto "var", and frees the original value of *var. */ /* *INDENT-OFF* */
void
free_copy_options(copy_options * ptr)
{
if (!ptr)
return;
free(ptr->before_tofrom);
free(ptr->after_tofrom);
free(ptr->file);
free(ptr->tableName);
free(ptr->columnList);
free(ptr);
}
/* concatenate "more" onto "var", freeing the original value of *var */
static void static void
xstrcat(char **var, const char *more) xstrcat(char **var, const char *more)
{ {
@ -210,21 +225,9 @@ error:
return NULL; return NULL;
} }
/* *INDENT-ON* */
/* Frees copy options. */ /* Frees copy options. */
void
free_copy_options(copy_options * ptr)
{
if (!ptr)
return;
free(ptr->before_tofrom);
free(ptr->after_tofrom);
free(ptr->file);
free(ptr->tableName);
free(ptr->columnList);
free(ptr);
}
/* /*
* ParseStageOptions takes the given copy options, parses the additional options * ParseStageOptions takes the given copy options, parses the additional options

View File

@ -1,5 +1,5 @@
/* /*
* csql - the CitusDB interactive terminal * csql - the Citus interactive terminal
* copy_options.h * copy_options.h
* Shared declarations for parsing copy and stage meta-commands. The stage * Shared declarations for parsing copy and stage meta-commands. The stage
* meta-command borrows from copy's syntax, but does not yet support * meta-command borrows from copy's syntax, but does not yet support
@ -46,7 +46,7 @@ typedef struct copy_options
bool psql_inout; /* true = use psql stdin/stdout */ bool psql_inout; /* true = use psql stdin/stdout */
bool from; /* true = FROM, false = TO */ bool from; /* true = FROM, false = TO */
char *tableName; /* table name to stage data to */ char *tableName; /* table name to stage data to */
char *columnList; /* optional column list used in staging */ char *columnList; /* optional column list used in staging */
} copy_options; } copy_options;

View File

@ -67,7 +67,7 @@ usage(unsigned short int pager)
output = PageOutput(59, pager ? &(pset.popt.topt) : NULL); output = PageOutput(59, pager ? &(pset.popt.topt) : NULL);
printf(_("csql is the CitusDB interactive terminal.\n\n")); printf(_("csql is the Citus interactive terminal.\n\n"));
fprintf(output, _("Usage:\n")); fprintf(output, _("Usage:\n"));
printf(_(" csql [OPTION]... [DBNAME [USERNAME]]\n\n")); printf(_(" csql [OPTION]... [DBNAME [USERNAME]]\n\n"));

View File

@ -200,7 +200,7 @@ MainLoop(FILE *source)
(line[4] == '\0' || line[4] == ';' || isspace((unsigned char) line[4]))) (line[4] == '\0' || line[4] == ';' || isspace((unsigned char) line[4])))
{ {
free(line); free(line);
puts(_("You are using csql, the command-line interface to CitusDB.")); puts(_("You are using csql, the command-line interface to Citus."));
printf(_("Type: \\copyright for distribution terms\n" printf(_("Type: \\copyright for distribution terms\n"
" \\h for help with SQL commands\n" " \\h for help with SQL commands\n"
" \\? for help with csql commands\n" " \\? for help with csql commands\n"

View File

@ -1,5 +1,5 @@
/* /*
* csql - the CitusDB interactive terminal * csql - the Citus interactive terminal
* stage.c * stage.c
* Helper routines to execute the csql meta-command \stage. These routines * Helper routines to execute the csql meta-command \stage. These routines
* communicate with the master and worker nodes; and create new shards and * communicate with the master and worker nodes; and create new shards and
@ -26,7 +26,8 @@
static bool FileSize(char *filename, uint64 *fileSize); static bool FileSize(char *filename, uint64 *fileSize);
static PGconn * ConnectToWorkerNode(const char *nodeName, uint32 nodePort, static PGconn * ConnectToWorkerNode(const char *nodeName, uint32 nodePort,
const char *nodeDatabase); const char *nodeDatabase);
static PGresult * ExecuteRemoteCommand(PGconn *remoteConnection, const char *remoteCommand, static PGresult * ExecuteRemoteCommand(PGconn *remoteConnection,
const char *remoteCommand,
const char **parameterValues, int parameterCount); const char **parameterValues, int parameterCount);
static TableMetadata * InitTableMetadata(const char *tableName); static TableMetadata * InitTableMetadata(const char *tableName);
static ShardMetadata * InitShardMetadata(int shardPlacementPolicy); static ShardMetadata * InitShardMetadata(int shardPlacementPolicy);
@ -41,7 +42,8 @@ static uint64 GetValueUint64(const PGresult *result, int rowNumber, int columnNu
static bool MasterGetTableMetadata(const char *tableName, TableMetadata *tableMetadata); static bool MasterGetTableMetadata(const char *tableName, TableMetadata *tableMetadata);
static bool MasterGetTableDDLEvents(const char *tableName, TableMetadata *tableMetadata); static bool MasterGetTableDDLEvents(const char *tableName, TableMetadata *tableMetadata);
static bool MasterGetNewShardId(ShardMetadata *shardMetadata); static bool MasterGetNewShardId(ShardMetadata *shardMetadata);
static bool MasterGetCandidateNodes(ShardMetadata *shardMetadata, int shardPlacementPolicy); static bool MasterGetCandidateNodes(ShardMetadata *shardMetadata,
int shardPlacementPolicy);
static bool MasterInsertShardRow(uint32 logicalRelid, char storageType, static bool MasterInsertShardRow(uint32 logicalRelid, char storageType,
const ShardMetadata *shardMetadata); const ShardMetadata *shardMetadata);
static bool MasterInsertPlacementRows(const ShardMetadata *shardMetadata); static bool MasterInsertPlacementRows(const ShardMetadata *shardMetadata);
@ -62,7 +64,8 @@ static bool ApplyShardDDLCommand(PGconn *workerNode, uint64 shardId, const char
static bool TransmitTableData(PGconn *workerNode, uint64 shardId, static bool TransmitTableData(PGconn *workerNode, uint64 shardId,
uint64 shardMaxSize, copy_options *stageOptions, uint64 shardMaxSize, copy_options *stageOptions,
uint64 currentFileOffset, uint64 *nextFileOffset); uint64 currentFileOffset, uint64 *nextFileOffset);
static bool TransmitFile(PGconn *workerNode, const char *localPath, const char *remotePath); static bool TransmitFile(PGconn *workerNode, const char *localPath,
const char *remotePath);
static bool FileStreamOK(const copy_options *stageOptions); static bool FileStreamOK(const copy_options *stageOptions);
static PQExpBuffer CreateCopyQueryString(const char *tableName, const char *columnList, static PQExpBuffer CreateCopyQueryString(const char *tableName, const char *columnList,
const char *afterToFrom); const char *afterToFrom);
@ -166,7 +169,7 @@ DoStageData(const char *stageCommand)
if (partitionMethod == DISTRIBUTE_BY_HASH) if (partitionMethod == DISTRIBUTE_BY_HASH)
{ {
psql_error("\\stage: staging data into hash partitioned tables is not " psql_error("\\stage: staging data into hash partitioned tables is not "
"supported\n"); "supported\n");
free_copy_options(stageOptions); free_copy_options(stageOptions);
FreeTableMetadata(tableMetadata); FreeTableMetadata(tableMetadata);
@ -179,7 +182,7 @@ DoStageData(const char *stageCommand)
bool tableOptionsOK = ColumnarTableOptionsOK(tableMetadata->logicalRelid); bool tableOptionsOK = ColumnarTableOptionsOK(tableMetadata->logicalRelid);
if (!tableOptionsOK) if (!tableOptionsOK)
{ {
return false; /* error message already displayed */ return false; /* error message already displayed */
} }
} }
@ -225,7 +228,7 @@ DoStageData(const char *stageCommand)
*/ */
FreeCommonStageData(stageOptions, tableMetadata, shardMetadataList); FreeCommonStageData(stageOptions, tableMetadata, shardMetadataList);
return false; /* abort immediately */ return false; /* abort immediately */
} }
/* save allocated shard metadata */ /* save allocated shard metadata */
@ -245,7 +248,7 @@ DoStageData(const char *stageCommand)
*/ */
for (nodeIndex = 0; nodeIndex < shardMetadata->nodeCount; nodeIndex++) for (nodeIndex = 0; nodeIndex < shardMetadata->nodeCount; nodeIndex++)
{ {
char *remoteNodeName = shardMetadata->nodeNameList[nodeIndex]; char *remoteNodeName = shardMetadata->nodeNameList[nodeIndex];
uint32 remoteNodePort = shardMetadata->nodePortList[nodeIndex]; uint32 remoteNodePort = shardMetadata->nodePortList[nodeIndex];
PGconn *remoteNode = NULL; PGconn *remoteNode = NULL;
@ -341,7 +344,6 @@ DoStageData(const char *stageCommand)
/* update current file offset */ /* update current file offset */
currentFileOffset = nextFileOffset; currentFileOffset = nextFileOffset;
} /* while more file data left for sharding */ } /* while more file data left for sharding */
/* /*
@ -390,9 +392,9 @@ ConnectToWorkerNode(const char *nodeName, uint32 nodePort, const char *nodeDatab
char nodePortString[MAXPGPATH]; char nodePortString[MAXPGPATH];
char connInfoString[MAXPGPATH]; char connInfoString[MAXPGPATH];
/* transcribe port number and connection info to their string values */ /* transcribe port number and connection info to their string values */
snprintf(nodePortString, MAXPGPATH, "%u", nodePort); snprintf(nodePortString, MAXPGPATH, "%u", nodePort);
snprintf(connInfoString, MAXPGPATH, CONN_INFO_TEMPLATE, snprintf(connInfoString, MAXPGPATH, CONN_INFO_TEMPLATE,
nodeDatabase, CLIENT_CONNECT_TIMEOUT); nodeDatabase, CLIENT_CONNECT_TIMEOUT);
workerNode = PQsetdb(nodeName, nodePortString, nodeOptions, nodeTty, connInfoString); workerNode = PQsetdb(nodeName, nodePortString, nodeOptions, nodeTty, connInfoString);
@ -421,16 +423,16 @@ ExecuteRemoteCommand(PGconn *remoteConnection, const char *remoteCommand,
{ {
PGresult *result = NULL; PGresult *result = NULL;
const Oid *parameterType = NULL; /* let the backend deduce type */ const Oid *parameterType = NULL; /* let the backend deduce type */
const int *parameterLength = NULL; /* text params do not need length */ const int *parameterLength = NULL; /* text params do not need length */
const int *parameterFormat = NULL; /* text params have Null by default */ const int *parameterFormat = NULL; /* text params have Null by default */
const int resultFormat = 0; /* ask for results in text format */ const int resultFormat = 0; /* ask for results in text format */
result = PQexecParams(remoteConnection, remoteCommand, result = PQexecParams(remoteConnection, remoteCommand,
parameterCount, parameterType, parameterValues, parameterCount, parameterType, parameterValues,
parameterLength, parameterFormat, resultFormat); parameterLength, parameterFormat, resultFormat);
if (PQresultStatus(result) != PGRES_COMMAND_OK && if (PQresultStatus(result) != PGRES_COMMAND_OK &&
PQresultStatus(result) != PGRES_TUPLES_OK) PQresultStatus(result) != PGRES_TUPLES_OK)
{ {
psql_error("remote command \"%s\" failed with %s", psql_error("remote command \"%s\" failed with %s",
@ -488,7 +490,7 @@ FreeTableMetadata(TableMetadata *tableMetadata)
for (eventIndex = 0; eventIndex < eventCount; eventIndex++) for (eventIndex = 0; eventIndex < eventCount; eventIndex++)
{ {
char *ddlEvent = tableMetadata->ddlEventList[eventIndex]; char *ddlEvent = tableMetadata->ddlEventList[eventIndex];
free(ddlEvent); free(ddlEvent);
ddlEvent = NULL; ddlEvent = NULL;
} }
@ -552,7 +554,7 @@ FreeShardMetadata(ShardMetadata *shardMetadata)
for (nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) for (nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++)
{ {
char *nodeName = shardMetadata->nodeNameList[nodeIndex]; char *nodeName = shardMetadata->nodeNameList[nodeIndex];
free(nodeName); free(nodeName);
nodeName = NULL; nodeName = NULL;
} }
@ -655,7 +657,7 @@ ExtendTablename(const char *baseTablename, uint64 shardId)
{ {
char *extendedTablename = (char *) pg_malloc0(NAMEDATALEN); char *extendedTablename = (char *) pg_malloc0(NAMEDATALEN);
snprintf(extendedTablename, NAMEDATALEN, "%s%c" UINT64_FORMAT, snprintf(extendedTablename, NAMEDATALEN, "%s%c" UINT64_FORMAT,
baseTablename, SHARD_NAME_SEPARATOR, shardId); baseTablename, SHARD_NAME_SEPARATOR, shardId);
return extendedTablename; return extendedTablename;
@ -678,7 +680,7 @@ GetValueUint64(const PGresult *result, int rowNumber, int columnNumber)
errno = 0; errno = 0;
value = strtoull(valueString, &valueStringEnd, 0); value = strtoull(valueString, &valueStringEnd, 0);
if (errno != 0 || (*valueStringEnd) != '\0') if (errno != 0 || (*valueStringEnd) != '\0')
{ {
return INVALID_UINT64; return INVALID_UINT64;
@ -716,7 +718,7 @@ MasterGetTableMetadata(const char *tableName, TableMetadata *tableMetadata)
char *tableStorageType = NULL; char *tableStorageType = NULL;
char *partitionMethod = NULL; char *partitionMethod = NULL;
char *partitionKey = NULL; char *partitionKey = NULL;
int partitionKeyLength = 0; int partitionKeyLength = 0;
uint64 logicalRelid = 0; uint64 logicalRelid = 0;
uint64 shardReplicaCount = 0; uint64 shardReplicaCount = 0;
uint64 shardMaxSize = 0; uint64 shardMaxSize = 0;
@ -727,7 +729,7 @@ MasterGetTableMetadata(const char *tableName, TableMetadata *tableMetadata)
parameterValue, parameterCount); parameterValue, parameterCount);
if (result == NULL) if (result == NULL)
{ {
return false; /* error message already displayed */ return false; /* error message already displayed */
} }
/* find column numbers associated with column names */ /* find column numbers associated with column names */
@ -798,13 +800,13 @@ MasterGetTableDDLEvents(const char *tableName, TableMetadata *tableMetadata)
int ddlEventIndex = 0; int ddlEventIndex = 0;
/* fetch DDL events needed for table creation */ /* fetch DDL events needed for table creation */
result = ExecuteRemoteCommand(masterNode, remoteCommand, result = ExecuteRemoteCommand(masterNode, remoteCommand,
parameterValue, parameterCount); parameterValue, parameterCount);
if (result == NULL) if (result == NULL)
{ {
return false; return false;
} }
/* check that we have at least one DDL event */ /* check that we have at least one DDL event */
ddlEventCount = PQntuples(result); ddlEventCount = PQntuples(result);
if (ddlEventCount <= 0) if (ddlEventCount <= 0)
@ -825,7 +827,7 @@ MasterGetTableDDLEvents(const char *tableName, TableMetadata *tableMetadata)
{ {
char *ddlEvent = NULL; char *ddlEvent = NULL;
char *ddlEventValue = PQgetvalue(result, ddlEventIndex, 0); char *ddlEventValue = PQgetvalue(result, ddlEventIndex, 0);
int ddlEventLength = PQgetlength(result, ddlEventIndex, 0); int ddlEventLength = PQgetlength(result, ddlEventIndex, 0);
if (ddlEventLength <= 0) if (ddlEventLength <= 0)
{ {
@ -866,7 +868,7 @@ MasterGetNewShardId(ShardMetadata *shardMetadata)
uint64 shardId = 0; uint64 shardId = 0;
/* fetch unique shardId for shard to be created */ /* fetch unique shardId for shard to be created */
result = ExecuteRemoteCommand(masterNode, remoteCommand, result = ExecuteRemoteCommand(masterNode, remoteCommand,
parameterValue, parameterCount); parameterValue, parameterCount);
if (result == NULL) if (result == NULL)
{ {
@ -877,7 +879,7 @@ MasterGetNewShardId(ShardMetadata *shardMetadata)
shardId = GetValueUint64(result, 0, 0); shardId = GetValueUint64(result, 0, 0);
if (shardId == INVALID_UINT64) if (shardId == INVALID_UINT64)
{ {
psql_error("remote command \"%s\" failed with invalid shardId\n", psql_error("remote command \"%s\" failed with invalid shardId\n",
remoteCommand); remoteCommand);
PQclear(result); PQclear(result);
@ -996,11 +998,11 @@ MasterGetCandidateNodes(ShardMetadata *shardMetadata, int shardPlacementPolicy)
/* walk over fetched node name/port list, and assign them to metadata */ /* walk over fetched node name/port list, and assign them to metadata */
for (nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) for (nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++)
{ {
char *nodeName = NULL; char *nodeName = NULL;
uint64 nodePort = 0; uint64 nodePort = 0;
char *nodeNameValue = PQgetvalue(result, nodeIndex, nodeNameIndex); char *nodeNameValue = PQgetvalue(result, nodeIndex, nodeNameIndex);
int nodeNameLength = PQgetlength(result, nodeIndex, nodeNameIndex); int nodeNameLength = PQgetlength(result, nodeIndex, nodeNameIndex);
if (nodeNameLength <= 0) if (nodeNameLength <= 0)
{ {
@ -1014,7 +1016,7 @@ MasterGetCandidateNodes(ShardMetadata *shardMetadata, int shardPlacementPolicy)
/* deep copy node name and assign to metadata */ /* deep copy node name and assign to metadata */
nodeName = (char *) pg_malloc0(nodeNameLength + 1); nodeName = (char *) pg_malloc0(nodeNameLength + 1);
strncpy(nodeName, nodeNameValue, nodeNameLength + 1); strncpy(nodeName, nodeNameValue, nodeNameLength + 1);
shardMetadata->nodeNameList[nodeIndex] = nodeName; shardMetadata->nodeNameList[nodeIndex] = nodeName;
/* convert port value string to 64-bit integer, and assign to metadata */ /* convert port value string to 64-bit integer, and assign to metadata */
@ -1024,7 +1026,7 @@ MasterGetCandidateNodes(ShardMetadata *shardMetadata, int shardPlacementPolicy)
psql_error("remote command \"%s\" failed to fetch valid port number\n", psql_error("remote command \"%s\" failed to fetch valid port number\n",
remoteCommand); remoteCommand);
PQclear(result); PQclear(result);
return false; return false;
} }
@ -1107,12 +1109,12 @@ MasterInsertPlacementRows(const ShardMetadata *shardMetadata)
bool staged = shardMetadata->nodeStageList[nodeIndex]; bool staged = shardMetadata->nodeStageList[nodeIndex];
if (staged) if (staged)
{ {
char *nodeName = shardMetadata->nodeNameList[nodeIndex]; char *nodeName = shardMetadata->nodeNameList[nodeIndex];
uint32 nodePort = shardMetadata->nodePortList[nodeIndex]; uint32 nodePort = shardMetadata->nodePortList[nodeIndex];
/* convert parameter to its string representation */ /* convert parameter to its string representation */
snprintf(nodePortString, NAMEDATALEN, "%u", nodePort); snprintf(nodePortString, NAMEDATALEN, "%u", nodePort);
parameterValue[3] = nodeName; parameterValue[3] = nodeName;
parameterValue[4] = nodePortString; parameterValue[4] = nodePortString;
@ -1136,7 +1138,7 @@ MasterInsertPlacementRows(const ShardMetadata *shardMetadata)
* staged to worker nodes. The function executes shard metadata insert commands * staged to worker nodes. The function executes shard metadata insert commands
* within a single transaction so that either all or none of the metadata are * within a single transaction so that either all or none of the metadata are
* finalized. On success, the function commits the transaction and returns true. * finalized. On success, the function commits the transaction and returns true.
* On failure, the function rolls back the transaction and returns false. * On failure, the function rolls back the transaction and returns false.
*/ */
static bool static bool
MasterInsertShardMetadata(uint32 logicalRelid, char storageType, MasterInsertShardMetadata(uint32 logicalRelid, char storageType,
@ -1199,7 +1201,7 @@ IssueTransactionCommand(PGconn *connection, const char *command)
return false; return false;
} }
PQclear(result); PQclear(result);
return true; return true;
} }
@ -1729,7 +1731,7 @@ ShardColumnarTableSize(PGconn *workerNode, const char *tablename, uint64 shardId
* failure, the function returns false. * failure, the function returns false.
*/ */
static bool static bool
ShardMinMaxValues(PGconn *workerNode, const char *tablename, ShardMinMaxValues(PGconn *workerNode, const char *tablename,
const char *partitionKey, ShardMetadata *shardMetadata) const char *partitionKey, ShardMetadata *shardMetadata)
{ {
const int MinValueIndex = 0; const int MinValueIndex = 0;
@ -1744,7 +1746,7 @@ ShardMinMaxValues(PGconn *workerNode, const char *tablename,
int maxValueLength = 0; int maxValueLength = 0;
extendedTablename = ExtendTablename(tablename, shardMetadata->shardId); extendedTablename = ExtendTablename(tablename, shardMetadata->shardId);
snprintf(remoteCommand, MAXPGPATH, SHARD_MIN_MAX_COMMAND, snprintf(remoteCommand, MAXPGPATH, SHARD_MIN_MAX_COMMAND,
partitionKey, partitionKey, extendedTablename); partitionKey, partitionKey, extendedTablename);
result = PQexec(workerNode, remoteCommand); result = PQexec(workerNode, remoteCommand);

View File

@ -1,5 +1,5 @@
/* /*
* csql - the CitusDB interactive terminal * csql - the Citus interactive terminal
* stage.h * stage.h
* Declarations for the csql meta-command \stage. These declarations define a * Declarations for the csql meta-command \stage. These declarations define a
* protocol for the client to communicate to the master and worker nodes. * protocol for the client to communicate to the master and worker nodes.
@ -30,42 +30,44 @@
#define ROLLBACK_COMMAND "ROLLBACK" #define ROLLBACK_COMMAND "ROLLBACK"
/* Names of remote function calls to execute on the master. */ /* Names of remote function calls to execute on the master. */
#define MASTER_GET_TABLE_METADATA "SELECT * FROM master_get_table_metadata($1::text)" #define MASTER_GET_TABLE_METADATA "SELECT * FROM master_get_table_metadata($1::text)"
#define MASTER_GET_TABLE_DDL_EVENTS "SELECT * FROM master_get_table_ddl_events($1::text)" #define MASTER_GET_TABLE_DDL_EVENTS "SELECT * FROM master_get_table_ddl_events($1::text)"
#define MASTER_GET_NEW_SHARDID "SELECT * FROM master_get_new_shardid()" #define MASTER_GET_NEW_SHARDID "SELECT * FROM master_get_new_shardid()"
#define MASTER_GET_LOCAL_FIRST_CANDIDATE_NODES "SELECT * FROM \ #define MASTER_GET_LOCAL_FIRST_CANDIDATE_NODES \
master_get_local_first_candidate_nodes()" "SELECT * FROM master_get_local_first_candidate_nodes()"
#define MASTER_GET_ROUND_ROBIN_CANDIDATE_NODES "SELECT * FROM \ #define MASTER_GET_ROUND_ROBIN_CANDIDATE_NODES \
master_get_round_robin_candidate_nodes($1::int8)" "SELECT * FROM master_get_round_robin_candidate_nodes($1::int8)"
#define MASTER_INSERT_SHARD_ROW "INSERT INTO pg_dist_shard \ #define MASTER_INSERT_SHARD_ROW \
(logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES \ "INSERT INTO pg_dist_shard " \
($1::oid, $2::int8, $3::char, $4::text, $5::text)" "(logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES " \
#define MASTER_INSERT_PLACEMENT_ROW "INSERT INTO pg_dist_shard_placement \ "($1::oid, $2::int8, $3::char, $4::text, $5::text)"
(shardid, shardstate, shardlength, nodename, nodeport) VALUES \ #define MASTER_INSERT_PLACEMENT_ROW \
($1::int8, $2::int4, $3::int8, $4::text, $5::int4)" "INSERT INTO pg_dist_shard_placement " \
"(shardid, shardstate, shardlength, nodename, nodeport) VALUES " \
"($1::int8, $2::int4, $3::int8, $4::text, $5::int4)"
/* Column names used to identify response fields as returned from the master. */ /* Column names used to identify response fields as returned from the master. */
#define LOGICAL_RELID_FIELD "logical_relid" #define LOGICAL_RELID_FIELD "logical_relid"
#define PART_STORAGE_TYPE_FIELD "part_storage_type" #define PART_STORAGE_TYPE_FIELD "part_storage_type"
#define PART_METHOD_FIELD "part_method" #define PART_METHOD_FIELD "part_method"
#define PART_KEY_FIELD "part_key" #define PART_KEY_FIELD "part_key"
#define PART_REPLICA_COUNT_FIELD "part_replica_count" #define PART_REPLICA_COUNT_FIELD "part_replica_count"
#define PART_MAX_SIZE_FIELD "part_max_size" #define PART_MAX_SIZE_FIELD "part_max_size"
#define PART_PLACEMENT_POLICY_FIELD "part_placement_policy" #define PART_PLACEMENT_POLICY_FIELD "part_placement_policy"
#define NODE_NAME_FIELD "node_name" #define NODE_NAME_FIELD "node_name"
#define NODE_PORT_FIELD "node_port" #define NODE_PORT_FIELD "node_port"
/* the tablename in the overloaded COPY statement is the to-be-transferred file */ /* the tablename in the overloaded COPY statement is the to-be-transferred file */
#define TRANSMIT_REGULAR_COMMAND "COPY \"%s\" FROM STDIN WITH (format 'transmit')" #define TRANSMIT_REGULAR_COMMAND "COPY \"%s\" FROM STDIN WITH (format 'transmit')"
#define SHARD_MIN_MAX_COMMAND "SELECT min(%s), max(%s) FROM %s" #define SHARD_MIN_MAX_COMMAND "SELECT min(%s), max(%s) FROM %s"
#define SHARD_TABLE_SIZE_COMMAND "SELECT pg_table_size('%s')" #define SHARD_TABLE_SIZE_COMMAND "SELECT pg_table_size('%s')"
#define SET_FOREIGN_TABLE_FILENAME "ALTER FOREIGN TABLE %s OPTIONS (SET filename '%s')" #define SET_FOREIGN_TABLE_FILENAME "ALTER FOREIGN TABLE %s OPTIONS (SET filename '%s')"
#define GET_COLUMNAR_TABLE_FILENAME_OPTION "SELECT * FROM \ #define GET_COLUMNAR_TABLE_FILENAME_OPTION \
(SELECT (pg_options_to_table(ftoptions)).* FROM pg_foreign_table \ "SELECT * FROM (SELECT (pg_options_to_table(ftoptions)).* FROM pg_foreign_table " \
WHERE ftrelid = %u) AS Q WHERE option_name = 'filename';" "WHERE ftrelid = %u) AS Q WHERE option_name = 'filename';"
#define APPLY_SHARD_DDL_COMMAND "SELECT * FROM worker_apply_shard_ddl_command \ #define APPLY_SHARD_DDL_COMMAND \
($1::int8, $2::text)" "SELECT * FROM worker_apply_shard_ddl_command ($1::int8, $2::text)"
#define REMOTE_FILE_SIZE_COMMAND "SELECT size FROM pg_stat_file('%s')" #define REMOTE_FILE_SIZE_COMMAND "SELECT size FROM pg_stat_file('%s')"
#define SHARD_COLUMNAR_TABLE_SIZE_COMMAND "SELECT cstore_table_size('%s')" #define SHARD_COLUMNAR_TABLE_SIZE_COMMAND "SELECT cstore_table_size('%s')"
@ -90,17 +92,16 @@
*/ */
typedef struct TableMetadata typedef struct TableMetadata
{ {
uint32 logicalRelid; /* table's relationId on the master */ uint32 logicalRelid; /* table's relationId on the master */
char tableStorageType; /* relay file, foreign table, or table */ char tableStorageType; /* relay file, foreign table, or table */
char partitionMethod; /* table's partition method */ char partitionMethod; /* table's partition method */
char *partitionKey; /* partition key expression */ char *partitionKey; /* partition key expression */
uint32 shardReplicaCount; /* shard replication factor */ uint32 shardReplicaCount; /* shard replication factor */
uint64 shardMaxSize; /* create new shard when shard reaches max size */ uint64 shardMaxSize; /* create new shard when shard reaches max size */
uint32 shardPlacementPolicy; /* policy to use when choosing nodes to place shards */ uint32 shardPlacementPolicy; /* policy to use when choosing nodes to place shards */
char **ddlEventList; /* DDL statements used for creating new shard */ char **ddlEventList; /* DDL statements used for creating new shard */
uint32 ddlEventCount; /* DDL statement count; statement list size */ uint32 ddlEventCount; /* DDL statement count; statement list size */
} TableMetadata; } TableMetadata;
@ -112,17 +113,16 @@ typedef struct TableMetadata
*/ */
typedef struct ShardMetadata typedef struct ShardMetadata
{ {
uint64 shardId; /* global shardId; created on the master node */ uint64 shardId; /* global shardId; created on the master node */
char **nodeNameList; /* candidate node name list for shard uploading */ char **nodeNameList; /* candidate node name list for shard uploading */
uint32 *nodePortList; /* candidate node port list for shard uploading */ uint32 *nodePortList; /* candidate node port list for shard uploading */
uint32 nodeCount; /* candidate node count; node list size */ uint32 nodeCount; /* candidate node count; node list size */
bool *nodeStageList; /* shard uploaded to corresponding candidate node? */ bool *nodeStageList; /* shard uploaded to corresponding candidate node? */
char *shardMinValue; /* partition key's minimum value in shard */ char *shardMinValue; /* partition key's minimum value in shard */
char *shardMaxValue; /* partition key's maximum value in shard */ char *shardMaxValue; /* partition key's maximum value in shard */
uint64 shardSize; /* shard size; updated during staging */ uint64 shardSize; /* shard size; updated during staging */
} ShardMetadata; } ShardMetadata;

View File

@ -1,4 +1,4 @@
/stamp-h /stamp-h
/stamp-ext-h /stamp-ext-h
/citusdb_config.h /citus_config.h
/citusdb_config.h.in~ /citus_config.h.in~

View File

@ -1,9 +1,9 @@
/* src/include/citusdb_config.h.in. Generated from configure.in by autoheader. */ /* src/include/citus_config.h.in. Generated from configure.in by autoheader. */
/* /*
* citusdb_config.h.in is generated by autoconf/autoheader and * citus_config.h.in is generated by autoconf/autoheader and
* converted into citusdb_config.h by configure. Include when code needs to * converted into citus_config.h by configure. Include when code needs to
* depend on determinations made by configure. * depend on determinations made by configure.
* *
* Do not manually edit! * Do not manually edit!

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* citus_nodefuncs.h * citus_nodefuncs.h
* Node (de-)serialization support for CitusDB. * Node (de-)serialization support for Citus.
* *
* Copyright (c) 2012-2015, Citus Data, Inc. * Copyright (c) 2012-2015, Citus Data, Inc.
* *

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* citus_nodes.h * citus_nodes.h
* Additional node types, and related infrastructure, for CitusDB. * Additional node types, and related infrastructure, for Citus.
* *
* Copyright (c) 2012-2015, Citus Data, Inc. * Copyright (c) 2012-2015, Citus Data, Inc.
* *
@ -52,7 +52,7 @@ typedef enum CitusNodeTag
#define CitusIsA(nodeptr,_type_) (CitusNodeTag(nodeptr) == T_##_type_) #define CitusIsA(nodeptr,_type_) (CitusNodeTag(nodeptr) == T_##_type_)
/* CitusDB variant of newNode(), don't use directly. */ /* Citus variant of newNode(), don't use directly. */
#define CitusNewNode(size, tag) \ #define CitusNewNode(size, tag) \
({ Node *_result; \ ({ Node *_result; \
AssertMacro((size) >= sizeof(Node)); /* need the tag, at least */ \ AssertMacro((size) >= sizeof(Node)); /* need the tag, at least */ \
@ -63,7 +63,7 @@ typedef enum CitusNodeTag
/* /*
* CitusMakeNode is CitusDB variant of makeNode(). Use it to create nodes of * CitusMakeNode is Citus variant of makeNode(). Use it to create nodes of
* the types listed in the CitusNodeTag enum and plain NodeTag. Initializes * the types listed in the CitusNodeTag enum and plain NodeTag. Initializes
* memory, besides the node tag, to 0. * memory, besides the node tag, to 0.
*/ */

View File

@ -1,8 +1,8 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* citus_ruleutils.h * citus_ruleutils.h
* CitusDB ruleutils wrapper functions and exported PostgreSQL ruleutils * Citus ruleutils wrapper functions and exported PostgreSQL ruleutils
* functions. * functions.
* *
* Copyright (c) 2012-2015, Citus Data, Inc. * Copyright (c) 2012-2015, Citus Data, Inc.
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
@ -15,17 +15,18 @@
#include "nodes/parsenodes.h" #include "nodes/parsenodes.h"
/* Function declarations for version independent CitusDB ruleutils wrapper functions */ /* Function declarations for version independent Citus ruleutils wrapper functions */
extern char *pg_get_extensiondef_string(Oid tableRelationId); extern char * pg_get_extensiondef_string(Oid tableRelationId);
extern char *pg_get_serverdef_string(Oid tableRelationId); extern char * pg_get_serverdef_string(Oid tableRelationId);
extern char *pg_get_tableschemadef_string(Oid tableRelationId); extern char * pg_get_tableschemadef_string(Oid tableRelationId);
extern char *pg_get_tablecolumnoptionsdef_string(Oid tableRelationId); extern char * pg_get_tablecolumnoptionsdef_string(Oid tableRelationId);
extern char *pg_get_indexclusterdef_string(Oid indexRelationId); extern char * pg_get_indexclusterdef_string(Oid indexRelationId);
/* Function declarations for version dependent PostgreSQL ruleutils functions */ /* Function declarations for version dependent PostgreSQL ruleutils functions */
extern void pg_get_query_def(Query *query, StringInfo buffer); extern void pg_get_query_def(Query *query, StringInfo buffer);
extern void deparse_shard_query(Query *query, Oid distrelid, int64 shardid, StringInfo buffer); extern void deparse_shard_query(Query *query, Oid distrelid, int64 shardid, StringInfo
extern char *generate_relation_name(Oid relid, List *namespaces); buffer);
extern char * generate_relation_name(Oid relid, List *namespaces);
#endif /* CITUS_RULEUTILS_H */ #endif /* CITUS_RULEUTILS_H */

View File

@ -51,7 +51,7 @@ typedef struct NodeConnectionEntry
/* function declarations for obtaining and using a connection */ /* function declarations for obtaining and using a connection */
extern PGconn * GetConnection(char *nodeName, int32 nodePort); extern PGconn * GetOrEstablishConnection(char *nodeName, int32 nodePort);
extern void PurgeConnection(PGconn *connection); extern void PurgeConnection(PGconn *connection);
extern void ReportRemoteError(PGconn *connection, PGresult *result); extern void ReportRemoteError(PGconn *connection, PGresult *result);

View File

@ -9,8 +9,8 @@
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#ifndef CITUSDB_LISTUTILS_H #ifndef CITUS_LISTUTILS_H
#define CITUSDB_LISTUTILS_H #define CITUS_LISTUTILS_H
#include "postgres.h" #include "postgres.h"
#include "c.h" #include "c.h"
@ -23,4 +23,4 @@ extern List * SortList(List *pointerList,
int (*ComparisonFunction)(const void *, const void *)); int (*ComparisonFunction)(const void *, const void *));
#endif /* CITUSDB_LISTUTILS_H */ #endif /* CITUS_LISTUTILS_H */

View File

@ -30,15 +30,14 @@ typedef struct ShardInterval
CitusNodeTag type; CitusNodeTag type;
Oid relationId; Oid relationId;
char storageType; char storageType;
Oid valueTypeId; /* min/max value datum's typeId */ Oid valueTypeId; /* min/max value datum's typeId */
int valueTypeLen; /* min/max value datum's typelen */ int valueTypeLen; /* min/max value datum's typelen */
bool valueByVal; /* min/max value datum's byval */ bool valueByVal; /* min/max value datum's byval */
bool minValueExists; bool minValueExists;
bool maxValueExists; bool maxValueExists;
Datum minValue; /* a shard's typed min value datum */ Datum minValue; /* a shard's typed min value datum */
Datum maxValue; /* a shard's typed max value datum */ Datum maxValue; /* a shard's typed max value datum */
uint64 shardId; uint64 shardId;
} ShardInterval; } ShardInterval;
@ -46,13 +45,12 @@ typedef struct ShardInterval
typedef struct ShardPlacement typedef struct ShardPlacement
{ {
CitusNodeTag type; CitusNodeTag type;
Oid tupleOid; /* unique oid that implies this row's insertion order */ Oid tupleOid; /* unique oid that implies this row's insertion order */
uint64 shardId; uint64 shardId;
uint64 shardLength; uint64 shardLength;
RelayFileState shardState; RelayFileState shardState;
char *nodeName; char *nodeName;
uint32 nodePort; uint32 nodePort;
} ShardPlacement; } ShardPlacement;

View File

@ -49,13 +49,14 @@
#define SHARDID_SEQUENCE_NAME "pg_dist_shardid_seq" #define SHARDID_SEQUENCE_NAME "pg_dist_shardid_seq"
/* Remote call definitions to help with data staging and deletion */ /* Remote call definitions to help with data staging and deletion */
#define WORKER_APPLY_SHARD_DDL_COMMAND "SELECT worker_apply_shard_ddl_command \ #define WORKER_APPLY_SHARD_DDL_COMMAND \
("UINT64_FORMAT", %s)" "SELECT worker_apply_shard_ddl_command (" UINT64_FORMAT ", %s)"
#define WORKER_APPEND_TABLE_TO_SHARD "SELECT worker_append_table_to_shard \ #define WORKER_APPEND_TABLE_TO_SHARD \
(%s, %s, %s, %u)" "SELECT worker_append_table_to_shard (%s, %s, %s, %u)"
#define SHARD_MIN_VALUE_QUERY "SELECT min(%s) FROM %s" #define SHARD_MIN_VALUE_QUERY "SELECT min(%s) FROM %s"
#define SHARD_MAX_VALUE_QUERY "SELECT max(%s) FROM %s" #define SHARD_MAX_VALUE_QUERY "SELECT max(%s) FROM %s"
#define SHARD_TABLE_SIZE_QUERY "SELECT pg_table_size('%s')" #define SHARD_TABLE_SIZE_QUERY "SELECT pg_table_size('%s')"
#define SHARD_CSTORE_TABLE_SIZE_QUERY "SELECT cstore_table_size('%s')"
#define DROP_REGULAR_TABLE_COMMAND "DROP TABLE IF EXISTS %s" #define DROP_REGULAR_TABLE_COMMAND "DROP TABLE IF EXISTS %s"
#define DROP_FOREIGN_TABLE_COMMAND "DROP FOREIGN TABLE IF EXISTS %s" #define DROP_FOREIGN_TABLE_COMMAND "DROP FOREIGN TABLE IF EXISTS %s"
#define CREATE_SCHEMA_COMMAND "CREATE SCHEMA IF NOT EXISTS %s" #define CREATE_SCHEMA_COMMAND "CREATE SCHEMA IF NOT EXISTS %s"
@ -67,7 +68,6 @@ typedef enum
SHARD_PLACEMENT_INVALID_FIRST = 0, SHARD_PLACEMENT_INVALID_FIRST = 0,
SHARD_PLACEMENT_LOCAL_NODE_FIRST = 1, SHARD_PLACEMENT_LOCAL_NODE_FIRST = 1,
SHARD_PLACEMENT_ROUND_ROBIN = 2 SHARD_PLACEMENT_ROUND_ROBIN = 2
} ShardPlacementPolicyType; } ShardPlacementPolicyType;
@ -83,8 +83,8 @@ extern Oid ResolveRelationId(text *relationName);
extern List * GetTableDDLEvents(Oid relationId); extern List * GetTableDDLEvents(Oid relationId);
extern void CheckDistributedTable(Oid relationId); extern void CheckDistributedTable(Oid relationId);
extern void CreateShardPlacements(int64 shardId, List *ddlEventList, extern void CreateShardPlacements(int64 shardId, List *ddlEventList,
List *workerNodeList, int workerStartIndex, List *workerNodeList, int workerStartIndex,
int replicationFactor); int replicationFactor);
/* Function declarations for generating metadata for shard creation */ /* Function declarations for generating metadata for shard creation */
extern Datum master_get_table_metadata(PG_FUNCTION_ARGS); extern Datum master_get_table_metadata(PG_FUNCTION_ARGS);

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* metadata_cache.h * metadata_cache.h
* Executor support for CitusDB. * Executor support for Citus.
* *
* Copyright (c) 2012-2015, Citus Data, Inc. * Copyright (c) 2012-2015, Citus Data, Inc.
* *
@ -47,7 +47,7 @@ extern ShardInterval * LoadShardInterval(uint64 shardId);
extern DistTableCacheEntry * DistributedTableCacheEntry(Oid distributedRelationId); extern DistTableCacheEntry * DistributedTableCacheEntry(Oid distributedRelationId);
extern void CitusInvalidateRelcacheByRelid(Oid relationId); extern void CitusInvalidateRelcacheByRelid(Oid relationId);
extern bool CitusDBHasBeenLoaded(void); extern bool CitusHasBeenLoaded(void);
/* relation oids */ /* relation oids */
extern Oid DistPartitionRelationId(void); extern Oid DistPartitionRelationId(void);

View File

@ -24,6 +24,7 @@
#define INVALID_TASK_ID 0 #define INVALID_TASK_ID 0
#if (PG_VERSION_NUM >= 90500) #if (PG_VERSION_NUM >= 90500)
/* reserved alias name for UPSERTs */ /* reserved alias name for UPSERTs */
#define UPSERT_ALIAS "citus_table_alias" #define UPSERT_ALIAS "citus_table_alias"
#endif #endif

View File

@ -15,21 +15,20 @@
#define MULTI_CLIENT_EXECUTOR_H #define MULTI_CLIENT_EXECUTOR_H
#define INVALID_CONNECTION_ID -1 /* identifies an invalid connection */ #define INVALID_CONNECTION_ID -1 /* identifies an invalid connection */
#define CLIENT_CONNECT_TIMEOUT 5 /* connection timeout in seconds */ #define CLIENT_CONNECT_TIMEOUT 5 /* connection timeout in seconds */
#define MAX_CONNECTION_COUNT 2048 /* simultaneous client connection count */ #define MAX_CONNECTION_COUNT 2048 /* simultaneous client connection count */
#define STRING_BUFFER_SIZE 1024 /* buffer size for character arrays */ #define STRING_BUFFER_SIZE 1024 /* buffer size for character arrays */
#define CONN_INFO_TEMPLATE "host=%s port=%u dbname=%s connect_timeout=%u" #define CONN_INFO_TEMPLATE "host=%s port=%u dbname=%s connect_timeout=%u"
/* Enumeration to track one client connection's status */ /* Enumeration to track one client connection's status */
typedef enum typedef enum
{ {
CLIENT_INVALID_CONNECT = 0, CLIENT_INVALID_CONNECT = 0,
CLIENT_CONNECTION_BAD = 1, CLIENT_CONNECTION_BAD = 1,
CLIENT_CONNECTION_BUSY = 2, CLIENT_CONNECTION_BUSY = 2,
CLIENT_CONNECTION_READY = 3 CLIENT_CONNECTION_READY = 3
} ConnectStatus; } ConnectStatus;
@ -38,9 +37,8 @@ typedef enum
{ {
CLIENT_INVALID_RESULT_STATUS = 0, CLIENT_INVALID_RESULT_STATUS = 0,
CLIENT_RESULT_UNAVAILABLE = 1, CLIENT_RESULT_UNAVAILABLE = 1,
CLIENT_RESULT_BUSY = 2, CLIENT_RESULT_BUSY = 2,
CLIENT_RESULT_READY = 3 CLIENT_RESULT_READY = 3
} ResultStatus; } ResultStatus;
@ -48,10 +46,9 @@ typedef enum
typedef enum typedef enum
{ {
CLIENT_INVALID_QUERY = 0, CLIENT_INVALID_QUERY = 0,
CLIENT_QUERY_FAILED = 1, CLIENT_QUERY_FAILED = 1,
CLIENT_QUERY_DONE = 2, CLIENT_QUERY_DONE = 2,
CLIENT_QUERY_COPY = 3 CLIENT_QUERY_COPY = 3
} QueryStatus; } QueryStatus;
@ -59,21 +56,19 @@ typedef enum
typedef enum typedef enum
{ {
CLIENT_INVALID_COPY = 0, CLIENT_INVALID_COPY = 0,
CLIENT_COPY_MORE = 1, CLIENT_COPY_MORE = 1,
CLIENT_COPY_FAILED = 2, CLIENT_COPY_FAILED = 2,
CLIENT_COPY_DONE = 3 CLIENT_COPY_DONE = 3
} CopyStatus; } CopyStatus;
/* Enumeration to track the status of a query in a batch on the client */ /* Enumeration to track the status of a query in a batch on the client */
typedef enum typedef enum
{ {
CLIENT_INVALID_BATCH_QUERY = 0, CLIENT_INVALID_BATCH_QUERY = 0,
CLIENT_BATCH_QUERY_FAILED = 1, CLIENT_BATCH_QUERY_FAILED = 1,
CLIENT_BATCH_QUERY_CONTINUE = 2, CLIENT_BATCH_QUERY_CONTINUE = 2,
CLIENT_BATCH_QUERY_DONE = 3 CLIENT_BATCH_QUERY_DONE = 3
} BatchQueryStatus; } BatchQueryStatus;

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* multi_executor.h * multi_executor.h
* Executor support for CitusDB. * Executor support for Citus.
* *
* Copyright (c) 2012-2015, Citus Data, Inc. * Copyright (c) 2012-2015, Citus Data, Inc.
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
@ -14,12 +14,12 @@
#include "nodes/parsenodes.h" #include "nodes/parsenodes.h"
/* signal currently executed statement is a master select statement or router execution */ /* signal currently executed statement is a master select statement or router execution */
#define EXEC_FLAG_CITUS_MASTER_SELECT 0x100 #define EXEC_FLAG_CITUS_MASTER_SELECT 0x100
#define EXEC_FLAG_CITUS_ROUTER_EXECUTOR 0x200 #define EXEC_FLAG_CITUS_ROUTER_EXECUTOR 0x200
extern void multi_ExecutorStart(QueryDesc *queryDesc, int eflags); extern void multi_ExecutorStart(QueryDesc *queryDesc, int eflags);
extern void multi_ExecutorRun(QueryDesc *queryDesc, extern void multi_ExecutorRun(QueryDesc *queryDesc,
ScanDirection direction, long count); ScanDirection direction, long count);
extern void multi_ExecutorFinish(QueryDesc *queryDesc); extern void multi_ExecutorFinish(QueryDesc *queryDesc);
extern void multi_ExecutorEnd(QueryDesc *queryDesc); extern void multi_ExecutorEnd(QueryDesc *queryDesc);

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* multi_explain.h * multi_explain.h
* Explain support for CitusDB. * Explain support for Citus.
* *
* Copyright (c) 2012-2015, Citus Data, Inc. * Copyright (c) 2012-2015, Citus Data, Inc.
*------------------------------------------------------------------------- *-------------------------------------------------------------------------

View File

@ -29,7 +29,7 @@ typedef enum JoinRuleType
{ {
JOIN_RULE_INVALID_FIRST = 0, JOIN_RULE_INVALID_FIRST = 0,
BROADCAST_JOIN = 1, BROADCAST_JOIN = 1,
LOCAL_PARTITION_JOIN = 2, LOCAL_PARTITION_JOIN = 2,
SINGLE_PARTITION_JOIN = 3, SINGLE_PARTITION_JOIN = 3,
DUAL_PARTITION_JOIN = 4, DUAL_PARTITION_JOIN = 4,
CARTESIAN_PRODUCT = 5, CARTESIAN_PRODUCT = 5,
@ -40,7 +40,6 @@ typedef enum JoinRuleType
* RuleNameArray. * RuleNameArray.
*/ */
JOIN_RULE_LAST JOIN_RULE_LAST
} JoinRuleType; } JoinRuleType;
@ -53,7 +52,6 @@ typedef struct TableEntry
{ {
Oid relationId; Oid relationId;
uint32 rangeTableId; uint32 rangeTableId;
} TableEntry; } TableEntry;
@ -65,14 +63,13 @@ typedef struct TableEntry
*/ */
typedef struct JoinOrderNode typedef struct JoinOrderNode
{ {
TableEntry *tableEntry; /* this node's relation and range table id */ TableEntry *tableEntry; /* this node's relation and range table id */
JoinRuleType joinRuleType; /* not relevant for the first table */ JoinRuleType joinRuleType; /* not relevant for the first table */
JoinType joinType; /* not relevant for the first table */ JoinType joinType; /* not relevant for the first table */
Var *partitionColumn; /* not relevant for the first table */ Var *partitionColumn; /* not relevant for the first table */
char partitionMethod; char partitionMethod;
List *joinClauseList; /* not relevant for the first table */ List *joinClauseList; /* not relevant for the first table */
List *shardIntervalList; List *shardIntervalList;
} JoinOrderNode; } JoinOrderNode;

View File

@ -44,7 +44,7 @@
* *
* Please note that the order of values in this enumeration is tied to the order * Please note that the order of values in this enumeration is tied to the order
* of elements in the following AggregateNames array. This order needs to be * of elements in the following AggregateNames array. This order needs to be
* preserved. * preserved.
*/ */
typedef enum typedef enum
{ {
@ -55,7 +55,6 @@ typedef enum
AGGREGATE_SUM = 4, AGGREGATE_SUM = 4,
AGGREGATE_COUNT = 5, AGGREGATE_COUNT = 5,
AGGREGATE_ARRAY_AGG = 6 AGGREGATE_ARRAY_AGG = 6
} AggregateType; } AggregateType;
@ -69,7 +68,6 @@ typedef enum
PUSH_DOWN_VALID = 1, PUSH_DOWN_VALID = 1,
PUSH_DOWN_NOT_VALID = 2, PUSH_DOWN_NOT_VALID = 2,
PUSH_DOWN_SPECIAL_CONDITIONS = 3 PUSH_DOWN_SPECIAL_CONDITIONS = 3
} PushDownStatus; } PushDownStatus;
@ -82,7 +80,6 @@ typedef enum
PULL_UP_INVALID_FIRST = 0, PULL_UP_INVALID_FIRST = 0,
PULL_UP_VALID = 1, PULL_UP_VALID = 1,
PULL_UP_NOT_VALID = 2 PULL_UP_NOT_VALID = 2
} PullUpStatus; } PullUpStatus;
@ -97,8 +94,10 @@ typedef enum
* Please note that the order of elements in this array is tied to the order of * Please note that the order of elements in this array is tied to the order of
* values in the preceding AggregateType enum. This order needs to be preserved. * values in the preceding AggregateType enum. This order needs to be preserved.
*/ */
static const char * const AggregateNames[] = { "invalid", "avg", "min", "max", static const char *const AggregateNames[] = {
"sum", "count", "array_agg" }; "invalid", "avg", "min", "max", "sum",
"count", "array_agg"
};
/* Config variable managed via guc.c */ /* Config variable managed via guc.c */

View File

@ -40,8 +40,8 @@ typedef struct MultiNode
CitusNodeTag type; CitusNodeTag type;
struct MultiNode *parentNode; struct MultiNode *parentNode;
/* child node(s) are defined in unary and binary nodes */
/* child node(s) are defined in unary and binary nodes */
} MultiNode; } MultiNode;
@ -51,7 +51,6 @@ typedef struct MultiUnaryNode
MultiNode node; MultiNode node;
struct MultiNode *childNode; struct MultiNode *childNode;
} MultiUnaryNode; } MultiUnaryNode;
@ -62,7 +61,6 @@ typedef struct MultiBinaryNode
struct MultiNode *leftChildNode; struct MultiNode *leftChildNode;
struct MultiNode *rightChildNode; struct MultiNode *rightChildNode;
} MultiBinaryNode; } MultiBinaryNode;
@ -73,7 +71,6 @@ typedef struct MultiBinaryNode
typedef struct MultiTreeRoot typedef struct MultiTreeRoot
{ {
MultiUnaryNode unaryNode; MultiUnaryNode unaryNode;
} MultiTreeRoot; } MultiTreeRoot;
@ -91,7 +88,6 @@ typedef struct MultiTable
Alias *alias; Alias *alias;
Alias *referenceNames; Alias *referenceNames;
Query *subquery; /* this field is only valid for non-relation subquery types */ Query *subquery; /* this field is only valid for non-relation subquery types */
} MultiTable; } MultiTable;
@ -100,7 +96,6 @@ typedef struct MultiProject
{ {
MultiUnaryNode unaryNode; MultiUnaryNode unaryNode;
List *columnList; List *columnList;
} MultiProject; } MultiProject;
@ -112,7 +107,6 @@ typedef struct MultiProject
typedef struct MultiCollect typedef struct MultiCollect
{ {
MultiUnaryNode unaryNode; MultiUnaryNode unaryNode;
} MultiCollect; } MultiCollect;
@ -125,7 +119,6 @@ typedef struct MultiSelect
{ {
MultiUnaryNode unaryNode; MultiUnaryNode unaryNode;
List *selectClauseList; List *selectClauseList;
} MultiSelect; } MultiSelect;
@ -140,7 +133,6 @@ typedef struct MultiJoin
List *joinClauseList; List *joinClauseList;
JoinRuleType joinRuleType; JoinRuleType joinRuleType;
JoinType joinType; JoinType joinType;
} MultiJoin; } MultiJoin;
@ -150,7 +142,6 @@ typedef struct MultiPartition
MultiUnaryNode unaryNode; MultiUnaryNode unaryNode;
Var *partitionColumn; Var *partitionColumn;
uint32 splitPointTableId; uint32 splitPointTableId;
} MultiPartition; } MultiPartition;
@ -158,7 +149,6 @@ typedef struct MultiPartition
typedef struct MultiCartesianProduct typedef struct MultiCartesianProduct
{ {
MultiBinaryNode binaryNode; MultiBinaryNode binaryNode;
} MultiCartesianProduct; } MultiCartesianProduct;
@ -183,7 +173,6 @@ typedef struct MultiExtendedOp
List *sortClauseList; List *sortClauseList;
Node *limitCount; Node *limitCount;
Node *limitOffset; Node *limitOffset;
} MultiExtendedOp; } MultiExtendedOp;

View File

@ -2,7 +2,7 @@
* *
* multi_physical_planner.h * multi_physical_planner.h
* Type and function declarations used in creating the distributed execution * Type and function declarations used in creating the distributed execution
* plan. * plan.
* *
* Copyright (c) 2012, Citus Data, Inc. * Copyright (c) 2012, Citus Data, Inc.
* *
@ -40,17 +40,18 @@
(" UINT64_FORMAT ", %d, %s, '%s', %d, %d)" (" UINT64_FORMAT ", %d, %s, '%s', %d, %d)"
#define MERGE_FILES_INTO_TABLE_COMMAND "SELECT worker_merge_files_into_table \ #define MERGE_FILES_INTO_TABLE_COMMAND "SELECT worker_merge_files_into_table \
(" UINT64_FORMAT ", %d, '%s', '%s')" (" UINT64_FORMAT ", %d, '%s', '%s')"
#define MERGE_FILES_AND_RUN_QUERY_COMMAND "SELECT worker_merge_files_and_run_query(" UINT64_FORMAT ", %d, '%s', '%s')" #define MERGE_FILES_AND_RUN_QUERY_COMMAND \
"SELECT worker_merge_files_and_run_query(" UINT64_FORMAT ", %d, '%s', '%s')"
typedef enum CitusRTEKind typedef enum CitusRTEKind
{ {
CITUS_RTE_RELATION = RTE_RELATION, /* ordinary relation reference */ CITUS_RTE_RELATION = RTE_RELATION, /* ordinary relation reference */
CITUS_RTE_SUBQUERY = RTE_SUBQUERY, /* subquery in FROM */ CITUS_RTE_SUBQUERY = RTE_SUBQUERY, /* subquery in FROM */
CITUS_RTE_JOIN = RTE_JOIN, /* join */ CITUS_RTE_JOIN = RTE_JOIN, /* join */
CITUS_RTE_FUNCTION = RTE_FUNCTION, /* function in FROM */ CITUS_RTE_FUNCTION = RTE_FUNCTION, /* function in FROM */
CITUS_RTE_VALUES = RTE_VALUES, /* VALUES (<exprlist>), (<exprlist>), ... */ CITUS_RTE_VALUES = RTE_VALUES, /* VALUES (<exprlist>), (<exprlist>), ... */
CITUS_RTE_CTE = RTE_CTE, /* common table expr (WITH list element) */ CITUS_RTE_CTE = RTE_CTE, /* common table expr (WITH list element) */
CITUS_RTE_SHARD, CITUS_RTE_SHARD,
CITUS_RTE_REMOTE_QUERY CITUS_RTE_REMOTE_QUERY
} CitusRTEKind; } CitusRTEKind;
@ -61,8 +62,7 @@ typedef enum
{ {
PARTITION_INVALID_FIRST = 0, PARTITION_INVALID_FIRST = 0,
RANGE_PARTITION_TYPE = 1, RANGE_PARTITION_TYPE = 1,
HASH_PARTITION_TYPE = 2 HASH_PARTITION_TYPE = 2
} PartitionType; } PartitionType;
@ -77,7 +77,6 @@ typedef enum
MAP_OUTPUT_FETCH_TASK = 5, MAP_OUTPUT_FETCH_TASK = 5,
MERGE_FETCH_TASK = 6, MERGE_FETCH_TASK = 6,
MODIFY_TASK = 7 MODIFY_TASK = 7
} TaskType; } TaskType;
@ -88,7 +87,6 @@ typedef enum
TASK_ASSIGNMENT_GREEDY = 1, TASK_ASSIGNMENT_GREEDY = 1,
TASK_ASSIGNMENT_ROUND_ROBIN = 2, TASK_ASSIGNMENT_ROUND_ROBIN = 2,
TASK_ASSIGNMENT_FIRST_REPLICA = 3 TASK_ASSIGNMENT_FIRST_REPLICA = 3
} TaskAssignmentPolicyType; } TaskAssignmentPolicyType;
@ -99,7 +97,6 @@ typedef enum
JOIN_MAP_MERGE_JOB = 1, JOIN_MAP_MERGE_JOB = 1,
SUBQUERY_MAP_MERGE_JOB = 2, SUBQUERY_MAP_MERGE_JOB = 2,
TOP_LEVEL_WORKER_JOB = 3 TOP_LEVEL_WORKER_JOB = 3
} BoundaryNodeJobType; } BoundaryNodeJobType;
@ -133,7 +130,6 @@ typedef struct MapMergeJob
ShardInterval **sortedShardIntervalArray; /* only applies to range partitioning */ ShardInterval **sortedShardIntervalArray; /* only applies to range partitioning */
List *mapTaskList; List *mapTaskList;
List *mergeTaskList; List *mergeTaskList;
} MapMergeJob; } MapMergeJob;
@ -153,18 +149,17 @@ typedef struct Task
uint64 jobId; uint64 jobId;
uint32 taskId; uint32 taskId;
char *queryString; char *queryString;
uint64 anchorShardId; /* only applies to compute tasks */ uint64 anchorShardId; /* only applies to compute tasks */
List *taskPlacementList; /* only applies to compute tasks */ List *taskPlacementList; /* only applies to compute tasks */
List *dependedTaskList; /* only applies to compute tasks */ List *dependedTaskList; /* only applies to compute tasks */
uint32 partitionId; uint32 partitionId;
uint32 upstreamTaskId; /* only applies to data fetch tasks */ uint32 upstreamTaskId; /* only applies to data fetch tasks */
ShardInterval *shardInterval; /* only applies to merge tasks */ ShardInterval *shardInterval; /* only applies to merge tasks */
bool assignmentConstrained; /* only applies to merge tasks */ bool assignmentConstrained; /* only applies to merge tasks */
uint64 shardId; /* only applies to shard fetch tasks */ uint64 shardId; /* only applies to shard fetch tasks */
TaskExecution *taskExecution; /* used by task tracker executor */ TaskExecution *taskExecution; /* used by task tracker executor */
bool upsertQuery; /* only applies to modify tasks */ bool upsertQuery; /* only applies to modify tasks */
} Task; } Task;
@ -177,7 +172,6 @@ typedef struct RangeTableFragment
CitusRTEKind fragmentType; CitusRTEKind fragmentType;
void *fragmentReference; void *fragmentReference;
uint32 rangeTableId; uint32 rangeTableId;
} RangeTableFragment; } RangeTableFragment;
@ -190,7 +184,6 @@ typedef struct JoinSequenceNode
{ {
uint32 rangeTableId; uint32 rangeTableId;
int32 joiningRangeTableId; int32 joiningRangeTableId;
} JoinSequenceNode; } JoinSequenceNode;
@ -203,7 +196,6 @@ typedef struct MultiPlan
Job *workerJob; Job *workerJob;
Query *masterQuery; Query *masterQuery;
char *masterTableName; char *masterTableName;
} MultiPlan; } MultiPlan;

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* multi_planner.h * multi_planner.h
* General CitusDB planner code. * General Citus planner code.
* *
* Copyright (c) 2012-2015, Citus Data, Inc. * Copyright (c) 2012-2015, Citus Data, Inc.
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
@ -13,8 +13,8 @@
#include "nodes/plannodes.h" #include "nodes/plannodes.h"
#include "nodes/relation.h" #include "nodes/relation.h"
extern PlannedStmt *multi_planner(Query *parse, int cursorOptions, extern PlannedStmt * multi_planner(Query *parse, int cursorOptions,
ParamListInfo boundParams); ParamListInfo boundParams);
extern bool HasCitusToplevelNode(PlannedStmt *planStatement); extern bool HasCitusToplevelNode(PlannedStmt *planStatement);
struct MultiPlan; struct MultiPlan;

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* multi_resowner.h * multi_resowner.h
* CitusDB resource owner integration. * Citus resource owner integration.
* *
* Copyright (c) 2012-2015, Citus Data, Inc. * Copyright (c) 2012-2015, Citus Data, Inc.
*------------------------------------------------------------------------- *-------------------------------------------------------------------------

View File

@ -20,9 +20,9 @@
#define MAX_TASK_EXECUTION_FAILURES 3 /* allowed failure count for one task */ #define MAX_TASK_EXECUTION_FAILURES 3 /* allowed failure count for one task */
#define MAX_TRACKER_FAILURE_COUNT 3 /* allowed failure count for one tracker */ #define MAX_TRACKER_FAILURE_COUNT 3 /* allowed failure count for one tracker */
#define REMOTE_NODE_CONNECT_TIMEOUT 4000 /* async connect timeout in ms */ #define REMOTE_NODE_CONNECT_TIMEOUT 4000 /* async connect timeout in ms */
#define RESERVED_FD_COUNT 64 /* file descriptors unavailable to executor */ #define RESERVED_FD_COUNT 64 /* file descriptors unavailable to executor */
/* copy out query results */ /* copy out query results */
#define COPY_QUERY_TO_STDOUT_TEXT "COPY (%s) TO STDOUT" #define COPY_QUERY_TO_STDOUT_TEXT "COPY (%s) TO STDOUT"
@ -32,9 +32,9 @@
/* Task tracker executor related defines */ /* Task tracker executor related defines */
#define TASK_ASSIGNMENT_QUERY "SELECT task_tracker_assign_task \ #define TASK_ASSIGNMENT_QUERY "SELECT task_tracker_assign_task \
("UINT64_FORMAT", %u, %s)" ("UINT64_FORMAT ", %u, %s)"
#define TASK_STATUS_QUERY "SELECT task_tracker_task_status("UINT64_FORMAT", %u)" #define TASK_STATUS_QUERY "SELECT task_tracker_task_status("UINT64_FORMAT ", %u)"
#define JOB_CLEANUP_QUERY "SELECT task_tracker_cleanup_job("UINT64_FORMAT")" #define JOB_CLEANUP_QUERY "SELECT task_tracker_cleanup_job("UINT64_FORMAT ")"
#define JOB_CLEANUP_TASK_ID INT_MAX #define JOB_CLEANUP_TASK_ID INT_MAX
@ -43,9 +43,9 @@ typedef enum
{ {
EXEC_TASK_INVALID_FIRST = 0, EXEC_TASK_INVALID_FIRST = 0,
EXEC_TASK_CONNECT_START = 1, EXEC_TASK_CONNECT_START = 1,
EXEC_TASK_CONNECT_POLL = 2, EXEC_TASK_CONNECT_POLL = 2,
EXEC_TASK_FAILED = 3, EXEC_TASK_FAILED = 3,
EXEC_FETCH_TASK_LOOP = 4, EXEC_FETCH_TASK_LOOP = 4,
EXEC_FETCH_TASK_START = 5, EXEC_FETCH_TASK_START = 5,
EXEC_FETCH_TASK_RUNNING = 6, EXEC_FETCH_TASK_RUNNING = 6,
EXEC_COMPUTE_TASK_START = 7, EXEC_COMPUTE_TASK_START = 7,
@ -60,7 +60,6 @@ typedef enum
EXEC_TASK_TRACKER_FAILED = 14, EXEC_TASK_TRACKER_FAILED = 14,
EXEC_SOURCE_TASK_TRACKER_RETRY = 15, EXEC_SOURCE_TASK_TRACKER_RETRY = 15,
EXEC_SOURCE_TASK_TRACKER_FAILED = 16 EXEC_SOURCE_TASK_TRACKER_FAILED = 16
} TaskExecStatus; } TaskExecStatus;
@ -74,7 +73,6 @@ typedef enum
EXEC_TRANSMIT_TRACKER_RETRY = 4, EXEC_TRANSMIT_TRACKER_RETRY = 4,
EXEC_TRANSMIT_TRACKER_FAILED = 5, EXEC_TRANSMIT_TRACKER_FAILED = 5,
EXEC_TRANSMIT_DONE = 6 EXEC_TRANSMIT_DONE = 6
} TransmitExecStatus; } TransmitExecStatus;
@ -86,7 +84,6 @@ typedef enum
TRACKER_CONNECT_POLL = 2, TRACKER_CONNECT_POLL = 2,
TRACKER_CONNECTED = 3, TRACKER_CONNECTED = 3,
TRACKER_CONNECTION_FAILED = 4 TRACKER_CONNECTION_FAILED = 4
} TrackerStatus; } TrackerStatus;
@ -97,7 +94,6 @@ typedef enum
MULTI_EXECUTOR_REAL_TIME = 1, MULTI_EXECUTOR_REAL_TIME = 1,
MULTI_EXECUTOR_TASK_TRACKER = 2, MULTI_EXECUTOR_TASK_TRACKER = 2,
MULTI_EXECUTOR_ROUTER = 3 MULTI_EXECUTOR_ROUTER = 3
} MultiExecutorType; } MultiExecutorType;
@ -107,7 +103,6 @@ typedef enum
CONNECT_ACTION_NONE = 0, CONNECT_ACTION_NONE = 0,
CONNECT_ACTION_OPENED = 1, CONNECT_ACTION_OPENED = 1,
CONNECT_ACTION_CLOSED = 2 CONNECT_ACTION_CLOSED = 2
} ConnectAction; } ConnectAction;
@ -132,7 +127,6 @@ struct TaskExecution
uint32 querySourceNodeIndex; /* only applies to map fetch tasks */ uint32 querySourceNodeIndex; /* only applies to map fetch tasks */
int32 dataFetchTaskIndex; int32 dataFetchTaskIndex;
uint32 failureCount; uint32 failureCount;
}; };
@ -147,7 +141,6 @@ typedef struct TrackerTaskState
uint32 taskId; uint32 taskId;
TaskStatus status; TaskStatus status;
StringInfo taskAssignmentQuery; StringInfo taskAssignmentQuery;
} TrackerTaskState; } TrackerTaskState;
@ -158,7 +151,7 @@ typedef struct TrackerTaskState
*/ */
typedef struct TaskTracker typedef struct TaskTracker
{ {
uint32 workerPort; /* node's port; part of hash table key */ uint32 workerPort; /* node's port; part of hash table key */
char workerName[WORKER_LENGTH]; /* node's name; part of hash table key */ char workerName[WORKER_LENGTH]; /* node's name; part of hash table key */
TrackerStatus trackerStatus; TrackerStatus trackerStatus;
int32 connectionId; int32 connectionId;
@ -171,7 +164,6 @@ typedef struct TaskTracker
int32 currentTaskIndex; int32 currentTaskIndex;
bool connectionBusy; bool connectionBusy;
TrackerTaskState *connectionBusyOnTask; TrackerTaskState *connectionBusyOnTask;
} TaskTracker; } TaskTracker;
@ -184,7 +176,6 @@ typedef struct WorkerNodeState
uint32 workerPort; uint32 workerPort;
char workerName[WORKER_LENGTH]; char workerName[WORKER_LENGTH];
uint32 openConnectionCount; uint32 openConnectionCount;
} WorkerNodeState; } WorkerNodeState;

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* multi_utility.h * multi_utility.h
* CitusDB utility hook and related functionality. * Citus utility hook and related functionality.
* *
* Copyright (c) 2012-2015, Citus Data, Inc. * Copyright (c) 2012-2015, Citus Data, Inc.
*------------------------------------------------------------------------- *-------------------------------------------------------------------------

View File

@ -21,9 +21,9 @@
*/ */
typedef struct FormData_pg_dist_partition typedef struct FormData_pg_dist_partition
{ {
Oid logicalrelid; /* logical relation id; references pg_class oid */ Oid logicalrelid; /* logical relation id; references pg_class oid */
char partmethod; /* partition method; see codes below */ char partmethod; /* partition method; see codes below */
text partkey; /* partition key expression */ text partkey; /* partition key expression */
} FormData_pg_dist_partition; } FormData_pg_dist_partition;
/* ---------------- /* ----------------
@ -37,16 +37,16 @@ typedef FormData_pg_dist_partition *Form_pg_dist_partition;
* compiler constants for pg_dist_partitions * compiler constants for pg_dist_partitions
* ---------------- * ----------------
*/ */
#define Natts_pg_dist_partition 3 #define Natts_pg_dist_partition 3
#define Anum_pg_dist_partition_logicalrelid 1 #define Anum_pg_dist_partition_logicalrelid 1
#define Anum_pg_dist_partition_partmethod 2 #define Anum_pg_dist_partition_partmethod 2
#define Anum_pg_dist_partition_partkey 3 #define Anum_pg_dist_partition_partkey 3
/* valid values for partmethod include append, hash, and range */ /* valid values for partmethod include append, hash, and range */
#define DISTRIBUTE_BY_APPEND 'a' #define DISTRIBUTE_BY_APPEND 'a'
#define DISTRIBUTE_BY_HASH 'h' #define DISTRIBUTE_BY_HASH 'h'
#define DISTRIBUTE_BY_RANGE 'r' #define DISTRIBUTE_BY_RANGE 'r'
#define REDISTRIBUTE_BY_HASH 'x' #define REDISTRIBUTE_BY_HASH 'x'
#endif /* PG_DIST_PARTITION_H */ #endif /* PG_DIST_PARTITION_H */

View File

@ -22,13 +22,13 @@
*/ */
typedef struct FormData_pg_dist_shard typedef struct FormData_pg_dist_shard
{ {
Oid logicalrelid; /* logical relation id; references pg_class oid */ Oid logicalrelid; /* logical relation id; references pg_class oid */
int64 shardid; /* global shardId representing remote partition */ int64 shardid; /* global shardId representing remote partition */
char shardstorage; /* shard storage type; see codes below */ char shardstorage; /* shard storage type; see codes below */
#ifdef CATALOG_VARLEN /* variable-length fields start here */ #ifdef CATALOG_VARLEN /* variable-length fields start here */
text shardalias; /* user specified table name for shard, if any */ text shardalias; /* user specified table name for shard, if any */
text shardminvalue; /* partition key's minimum value in shard */ text shardminvalue; /* partition key's minimum value in shard */
text shardmaxvalue; /* partition key's maximum value in shard */ text shardmaxvalue; /* partition key's maximum value in shard */
#endif #endif
} FormData_pg_dist_shard; } FormData_pg_dist_shard;
@ -43,22 +43,22 @@ typedef FormData_pg_dist_shard *Form_pg_dist_shard;
* compiler constants for pg_dist_shards * compiler constants for pg_dist_shards
* ---------------- * ----------------
*/ */
#define Natts_pg_dist_shard 6 #define Natts_pg_dist_shard 6
#define Anum_pg_dist_shard_logicalrelid 1 #define Anum_pg_dist_shard_logicalrelid 1
#define Anum_pg_dist_shard_shardid 2 #define Anum_pg_dist_shard_shardid 2
#define Anum_pg_dist_shard_shardstorage 3 #define Anum_pg_dist_shard_shardstorage 3
#define Anum_pg_dist_shard_shardalias 4 #define Anum_pg_dist_shard_shardalias 4
#define Anum_pg_dist_shard_shardminvalue 5 #define Anum_pg_dist_shard_shardminvalue 5
#define Anum_pg_dist_shard_shardmaxvalue 6 #define Anum_pg_dist_shard_shardmaxvalue 6
/* /*
* Valid values for shard storage types include relay file, foreign table, * Valid values for shard storage types include relay file, foreign table,
* (standard) table and columnar table. Relay file types are currently unused. * (standard) table and columnar table. Relay file types are currently unused.
*/ */
#define SHARD_STORAGE_RELAY 'r' #define SHARD_STORAGE_RELAY 'r'
#define SHARD_STORAGE_FOREIGN 'f' #define SHARD_STORAGE_FOREIGN 'f'
#define SHARD_STORAGE_TABLE 't' #define SHARD_STORAGE_TABLE 't'
#define SHARD_STORAGE_COLUMNAR 'c' #define SHARD_STORAGE_COLUMNAR 'c'
#endif /* PG_DIST_SHARD_H */ #endif /* PG_DIST_SHARD_H */

View File

@ -23,12 +23,12 @@
*/ */
typedef struct FormData_pg_dist_shard_placement typedef struct FormData_pg_dist_shard_placement
{ {
int64 shardid; /* global shardId on remote node */ int64 shardid; /* global shardId on remote node */
int32 shardstate; /* shard state on remote node; see RelayFileState */ int32 shardstate; /* shard state on remote node; see RelayFileState */
int64 shardlength; /* shard length on remote node; stored as bigint */ int64 shardlength; /* shard length on remote node; stored as bigint */
#ifdef CATALOG_VARLEN /* variable-length fields start here */ #ifdef CATALOG_VARLEN /* variable-length fields start here */
text nodename; /* remote node's host name */ text nodename; /* remote node's host name */
int32 nodeport; /* remote node's port number */ int32 nodeport; /* remote node's port number */
#endif #endif
} FormData_pg_dist_shard_placement; } FormData_pg_dist_shard_placement;
@ -43,12 +43,12 @@ typedef FormData_pg_dist_shard_placement *Form_pg_dist_shard_placement;
* compiler constants for pg_dist_shard_placement * compiler constants for pg_dist_shard_placement
* ---------------- * ----------------
*/ */
#define Natts_pg_dist_shard_placement 5 #define Natts_pg_dist_shard_placement 5
#define Anum_pg_dist_shard_placement_shardid 1 #define Anum_pg_dist_shard_placement_shardid 1
#define Anum_pg_dist_shard_placement_shardstate 2 #define Anum_pg_dist_shard_placement_shardstate 2
#define Anum_pg_dist_shard_placement_shardlength 3 #define Anum_pg_dist_shard_placement_shardlength 3
#define Anum_pg_dist_shard_placement_nodename 4 #define Anum_pg_dist_shard_placement_nodename 4
#define Anum_pg_dist_shard_placement_nodeport 5 #define Anum_pg_dist_shard_placement_nodeport 5
#endif /* PG_DIST_SHARD_PLACEMENT_H */ #endif /* PG_DIST_SHARD_PLACEMENT_H */

View File

@ -3,7 +3,7 @@
* relay_utility.h * relay_utility.h
* *
* Header and type declarations that extend relation, index and constraint names * Header and type declarations that extend relation, index and constraint names
* with the appropriate shard identifiers. * with the appropriate shard identifiers.
* *
* Copyright (c) 2012, Citus Data, Inc. * Copyright (c) 2012, Citus Data, Inc.
* *
@ -36,7 +36,6 @@ typedef enum
FILE_CACHED = 2, FILE_CACHED = 2,
FILE_INACTIVE = 3, FILE_INACTIVE = 3,
FILE_TO_DELETE = 4 FILE_TO_DELETE = 4
} RelayFileState; } RelayFileState;

View File

@ -1,7 +1,7 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* resource_lock.h * resource_lock.h
* Locking Infrastructure for CitusDB. * Locking Infrastructure for Citus.
* *
* Copyright (c) 2012-2015, Citus Data, Inc. * Copyright (c) 2012-2015, Citus Data, Inc.
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
@ -21,7 +21,7 @@
* advisory locks. Only 1 and 2 are used allowing us to define non-conflicting * advisory locks. Only 1 and 2 are used allowing us to define non-conflicting
* lock methods. * lock methods.
* *
* In case postgres starts to use additional values, CitusDB's values * In case postgres starts to use additional values, Citus's values
* will have to be changed. That just requires re-compiling and a restart. * will have to be changed. That just requires re-compiling and a restart.
*/ */
typedef enum AdvisoryLocktagClass typedef enum AdvisoryLocktagClass
@ -29,7 +29,8 @@ typedef enum AdvisoryLocktagClass
/* values defined in postgres' lockfuncs.c */ /* values defined in postgres' lockfuncs.c */
ADV_LOCKTAG_CLASS_INT64 = 1, ADV_LOCKTAG_CLASS_INT64 = 1,
ADV_LOCKTAG_CLASS_INT32 = 2, ADV_LOCKTAG_CLASS_INT32 = 2,
/* CitusDB lock types */
/* Citus lock types */
ADV_LOCKTAG_CLASS_CITUS_SHARD_METADATA = 4, ADV_LOCKTAG_CLASS_CITUS_SHARD_METADATA = 4,
ADV_LOCKTAG_CLASS_CITUS_SHARD = 5, ADV_LOCKTAG_CLASS_CITUS_SHARD = 5,
ADV_LOCKTAG_CLASS_CITUS_JOB = 6 ADV_LOCKTAG_CLASS_CITUS_JOB = 6

View File

@ -19,10 +19,10 @@
#include "utils/hsearch.h" #include "utils/hsearch.h"
#define HIGH_PRIORITY_TASK_TIME 1 /* assignment time for high priority tasks */ #define HIGH_PRIORITY_TASK_TIME 1 /* assignment time for high priority tasks */
#define RESERVED_JOB_ID 1 /* reserved for cleanup and shutdown tasks */ #define RESERVED_JOB_ID 1 /* reserved for cleanup and shutdown tasks */
#define SHUTDOWN_MARKER_TASK_ID UINT_MAX /* used to identify task tracker shutdown */ #define SHUTDOWN_MARKER_TASK_ID UINT_MAX /* used to identify task tracker shutdown */
#define MAX_TASK_FAILURE_COUNT 2 /* allowed failure count for one task */ #define MAX_TASK_FAILURE_COUNT 2 /* allowed failure count for one task */
#define LOCAL_HOST_NAME "localhost" /* connect to local backends using this name */ #define LOCAL_HOST_NAME "localhost" /* connect to local backends using this name */
#define TASK_CALL_STRING_SIZE 12288 /* max length of task call string */ #define TASK_CALL_STRING_SIZE 12288 /* max length of task call string */
#define TEMPLATE0_NAME "template0" /* skip job schema cleanup for template0 */ #define TEMPLATE0_NAME "template0" /* skip job schema cleanup for template0 */
@ -37,13 +37,13 @@
typedef enum typedef enum
{ {
TASK_STATUS_INVALID_FIRST = 0, TASK_STATUS_INVALID_FIRST = 0,
TASK_ASSIGNED = 1, /* master node and task tracker */ TASK_ASSIGNED = 1, /* master node and task tracker */
TASK_SCHEDULED = 2, TASK_SCHEDULED = 2,
TASK_RUNNING = 3, TASK_RUNNING = 3,
TASK_FAILED = 4, TASK_FAILED = 4,
TASK_PERMANENTLY_FAILED = 5, TASK_PERMANENTLY_FAILED = 5,
TASK_SUCCEEDED = 6, TASK_SUCCEEDED = 6,
TASK_CANCEL_REQUESTED = 7, /* master node only */ TASK_CANCEL_REQUESTED = 7, /* master node only */
TASK_CANCELED = 8, TASK_CANCELED = 8,
TASK_TO_REMOVE = 9, TASK_TO_REMOVE = 9,
@ -63,7 +63,6 @@ typedef enum
* TASK_STATUS_LAST, should never have their numbers changed. * TASK_STATUS_LAST, should never have their numbers changed.
*/ */
TASK_STATUS_LAST TASK_STATUS_LAST
} TaskStatus; } TaskStatus;
@ -76,16 +75,15 @@ typedef enum
*/ */
typedef struct WorkerTask typedef struct WorkerTask
{ {
uint64 jobId; /* job id (upper 32-bits reserved); part of hash table key */ uint64 jobId; /* job id (upper 32-bits reserved); part of hash table key */
uint32 taskId; /* task id; part of hash table key */ uint32 taskId; /* task id; part of hash table key */
uint32 assignedAt; /* task assignment time in epoch seconds */ uint32 assignedAt; /* task assignment time in epoch seconds */
char taskCallString[TASK_CALL_STRING_SIZE]; /* query or function call string */ char taskCallString[TASK_CALL_STRING_SIZE]; /* query or function call string */
TaskStatus taskStatus; /* task's current execution status */ TaskStatus taskStatus; /* task's current execution status */
char databaseName[NAMEDATALEN]; /* name to use for local backend connection */ char databaseName[NAMEDATALEN]; /* name to use for local backend connection */
int32 connectionId; /* connection id to local backend */ int32 connectionId; /* connection id to local backend */
uint32 failureCount; /* number of task failures */ uint32 failureCount; /* number of task failures */
} WorkerTask; } WorkerTask;
@ -97,6 +95,7 @@ typedef struct WorkerTasksSharedStateData
{ {
/* Hash table shared by the task tracker and task tracker protocol functions */ /* Hash table shared by the task tracker and task tracker protocol functions */
HTAB *taskHash; HTAB *taskHash;
/* Lock protecting workerNodesHash */ /* Lock protecting workerNodesHash */
LWLock *taskHashLock; LWLock *taskHashLock;
} WorkerTasksSharedStateData; } WorkerTasksSharedStateData;

View File

@ -10,8 +10,8 @@
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#ifndef CITUSDB_TEST_HELPER_FUNCTIONS_H #ifndef CITUS_TEST_HELPER_FUNCTIONS_H
#define CITUSDB_TEST_HELPER_FUNCTIONS_H #define CITUS_TEST_HELPER_FUNCTIONS_H
#include "postgres.h" #include "postgres.h"
#include "c.h" #include "c.h"
@ -70,4 +70,4 @@ extern Datum prune_using_both_values(PG_FUNCTION_ARGS);
extern Datum debug_equality_expression(PG_FUNCTION_ARGS); extern Datum debug_equality_expression(PG_FUNCTION_ARGS);
#endif /* CITUSDB_TEST_HELPER_FUNCTIONS_H */ #endif /* CITUS_TEST_HELPER_FUNCTIONS_H */

View File

@ -23,7 +23,7 @@
/* Maximum length of worker port number (represented as string) */ /* Maximum length of worker port number (represented as string) */
#define MAX_PORT_LENGTH 10 #define MAX_PORT_LENGTH 10
/* default filename for citusdb.worker_list_file */ /* default filename for citus.worker_list_file */
#define WORKER_LIST_FILENAME "pg_worker_list.conf" #define WORKER_LIST_FILENAME "pg_worker_list.conf"
/* Implementation specific definitions used in finding worker nodes */ /* Implementation specific definitions used in finding worker nodes */
@ -43,12 +43,11 @@
*/ */
typedef struct WorkerNode typedef struct WorkerNode
{ {
uint32 workerPort; /* node's port; part of hash table key */ uint32 workerPort; /* node's port; part of hash table key */
char workerName[WORKER_LENGTH]; /* node's name; part of hash table key */ char workerName[WORKER_LENGTH]; /* node's name; part of hash table key */
char workerRack[WORKER_LENGTH]; /* node's network location */ char workerRack[WORKER_LENGTH]; /* node's network location */
bool inWorkerFile; /* is node in current membership file? */ bool inWorkerFile; /* is node in current membership file? */
} WorkerNode; } WorkerNode;

View File

@ -64,8 +64,7 @@ typedef struct RangePartitionContext
{ {
FmgrInfo *comparisonFunction; FmgrInfo *comparisonFunction;
Datum *splitPointArray; Datum *splitPointArray;
int32 splitPointCount; int32 splitPointCount;
} RangePartitionContext; } RangePartitionContext;
@ -77,7 +76,6 @@ typedef struct HashPartitionContext
{ {
FmgrInfo *hashFunction; FmgrInfo *hashFunction;
uint32 partitionCount; uint32 partitionCount;
} HashPartitionContext; } HashPartitionContext;
@ -88,16 +86,16 @@ typedef struct HashPartitionContext
*/ */
typedef struct PartialCopyStateData typedef struct PartialCopyStateData
{ {
StringInfo fe_msgbuf; /* used for all dests during COPY TO, only for StringInfo fe_msgbuf; /* used for all dests during COPY TO, only for
* dest == COPY_NEW_FE in COPY FROM */ * dest == COPY_NEW_FE in COPY FROM */
int file_encoding; /* file or remote side's character encoding */ int file_encoding; /* file or remote side's character encoding */
bool need_transcoding; /* file encoding diff from server? */ bool need_transcoding; /* file encoding diff from server? */
bool binary; /* binary format? */ bool binary; /* binary format? */
char *null_print; /* NULL marker string (server encoding!) */ char *null_print; /* NULL marker string (server encoding!) */
char *null_print_client; /* same converted to file encoding */ char *null_print_client; /* same converted to file encoding */
char *delim; /* column delimiter (must be 1 byte) */ char *delim; /* column delimiter (must be 1 byte) */
MemoryContext rowcontext; /* per-row evaluation context */ MemoryContext rowcontext; /* per-row evaluation context */
} PartialCopyStateData; } PartialCopyStateData;
typedef struct PartialCopyStateData *PartialCopyState; typedef struct PartialCopyStateData *PartialCopyState;
@ -114,7 +112,6 @@ typedef struct FileOutputStream
File fileDescriptor; File fileDescriptor;
StringInfo fileBuffer; StringInfo fileBuffer;
StringInfo filePath; StringInfo filePath;
} FileOutputStream; } FileOutputStream;

View File

@ -1,9 +1,9 @@
# Makefile for tests of the CitusDB extension # Makefile for tests of the Citus extension
citusdb_subdir = src/test/regress citus_subdir = src/test/regress
citusdb_top_builddir = ../../.. citus_top_builddir = ../../..
include $(citusdb_top_builddir)/Makefile.global include $(citus_top_builddir)/Makefile.global
# ensure MAJORVERSION is defined (missing in older versions) # ensure MAJORVERSION is defined (missing in older versions)
ifndef MAJORVERSION ifndef MAJORVERSION
@ -11,11 +11,11 @@ MAJORVERSION := $(basename $(VERSION))
endif endif
## ##
## CitusDB regression support ## Citus regression support
## ##
MULTI_INSTALLDIR=$(CURDIR)/tmp_check/install MULTI_INSTALLDIR=$(CURDIR)/tmp_check/install
pg_regress_multi_check = $(PERL) $(citusdb_abs_srcdir)/pg_regress_multi.pl --pgxsdir="$(pgxsdir)" --bindir="$(bindir)" --libdir="$(libdir)" --majorversion="$(MAJORVERSION)" pg_regress_multi_check = $(PERL) $(citus_abs_srcdir)/pg_regress_multi.pl --pgxsdir="$(pgxsdir)" --bindir="$(bindir)" --libdir="$(libdir)" --majorversion="$(MAJORVERSION)"
MULTI_REGRESS_OPTS = --inputdir=$(citusdb_abs_srcdir) $(pg_regress_locale_flags) MULTI_REGRESS_OPTS = --inputdir=$(citus_abs_srcdir) $(pg_regress_locale_flags)
# XXX: Can't actually do useful testruns against install - $libdir # XXX: Can't actually do useful testruns against install - $libdir
# etc will point to the directory configured during postgres' # etc will point to the directory configured during postgres'
@ -26,12 +26,12 @@ cleandir-main:
### ###
tempinstall-main: cleandir-main tempinstall-main: cleandir-main
#### mkdir -p $(MULTI_INSTALLDIR) #### mkdir -p $(MULTI_INSTALLDIR)
### $(MAKE) DESTDIR=$(MULTI_INSTALLDIR) -C $(citusdb_top_builddir) install > tmp_check/install.log 2>&1 ### $(MAKE) DESTDIR=$(MULTI_INSTALLDIR) -C $(citus_top_builddir) install > tmp_check/install.log 2>&1
# Test input and expected files. These are created by pg_regress itself, so we # Test input and expected files. These are created by pg_regress itself, so we
# don't have a rule to create them. We do need rules to clean them however. # don't have a rule to create them. We do need rules to clean them however.
input_files := $(patsubst $(citusdb_abs_srcdir)/input/%.source,sql/%.sql, $(wildcard $(citusdb_abs_srcdir)/input/*.source)) input_files := $(patsubst $(citus_abs_srcdir)/input/%.source,sql/%.sql, $(wildcard $(citus_abs_srcdir)/input/*.source))
output_files := $(patsubst $(citusdb_abs_srcdir)/output/%.source,expected/%.out, $(wildcard $(citusdb_abs_srcdir)/output/*.source)) output_files := $(patsubst $(citus_abs_srcdir)/output/%.source,expected/%.out, $(wildcard $(citus_abs_srcdir)/output/*.source))
# have make check actually run all tests, but keep check-full as an # have make check actually run all tests, but keep check-full as an
# intermediate, for muscle memory backward compatibility. # intermediate, for muscle memory backward compatibility.
@ -42,32 +42,32 @@ check-full: check-multi check-multi-task-tracker check-multi-binary check-worker
# using pg_regress_multi_check unnecessarily starts up multiple nodes, which isn't needed # using pg_regress_multi_check unnecessarily starts up multiple nodes, which isn't needed
# for check-worker. But that's harmless besides a few cycles. # for check-worker. But that's harmless besides a few cycles.
check-worker: all check-worker: all
$(pg_regress_multi_check) --load-extension=citusdb \ $(pg_regress_multi_check) --load-extension=citus \
-- $(MULTI_REGRESS_OPTS) --schedule=$(citusdb_abs_srcdir)/worker_schedule $(EXTRA_TESTS) -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/worker_schedule $(EXTRA_TESTS)
check-multi: all tempinstall-main check-multi: all tempinstall-main
$(pg_regress_multi_check) --load-extension=citusdb \ $(pg_regress_multi_check) --load-extension=citus \
-- $(MULTI_REGRESS_OPTS) --schedule=$(citusdb_abs_srcdir)/multi_schedule $(EXTRA_TESTS) -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/multi_schedule $(EXTRA_TESTS)
check-multi-fdw: all tempinstall-main check-multi-fdw: all tempinstall-main
$(pg_regress_multi_check) --load-extension=citusdb --load-extension=file_fdw -- \ $(pg_regress_multi_check) --load-extension=citus --load-extension=file_fdw -- \
$(MULTI_REGRESS_OPTS) --schedule=$(citusdb_abs_srcdir)/multi_fdw_schedule $(EXTRA_TESTS) $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/multi_fdw_schedule $(EXTRA_TESTS)
check-multi-hll: all tempinstall-main check-multi-hll: all tempinstall-main
$(pg_regress_multi_check) --load-extension=citusdb --load-extension=hll -- \ $(pg_regress_multi_check) --load-extension=citus --load-extension=hll -- \
$(MULTI_REGRESS_OPTS) $(EXTRA_TESTS) multi_create_table multi_master_protocol multi_stage_data multi_agg_approximate_distinct $(MULTI_REGRESS_OPTS) $(EXTRA_TESTS) multi_create_table multi_master_protocol multi_stage_data multi_agg_approximate_distinct
check-multi-task-tracker: all tempinstall-main check-multi-task-tracker: all tempinstall-main
$(pg_regress_multi_check) --load-extension=citusdb \ $(pg_regress_multi_check) --load-extension=citus \
--server-option=citusdb.task_executor_type=task-tracker \ --server-option=citus.task_executor_type=task-tracker \
--server-option=citusdb.task_tracker_delay=50ms \ --server-option=citus.task_tracker_delay=50ms \
--server-option=citusdb.large_table_shard_count=1 \ --server-option=citus.large_table_shard_count=1 \
-- $(MULTI_REGRESS_OPTS) --schedule=$(citusdb_abs_srcdir)/multi_schedule $(EXTRA_TESTS) -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/multi_schedule $(EXTRA_TESTS)
check-multi-binary: all tempinstall-main check-multi-binary: all tempinstall-main
$(pg_regress_multi_check) --load-extension=citusdb \ $(pg_regress_multi_check) --load-extension=citus \
--server-option=citusdb.binary_worker_copy_format=on \ --server-option=citus.binary_worker_copy_format=on \
-- $(MULTI_REGRESS_OPTS) --schedule=$(citusdb_abs_srcdir)/multi_schedule $(EXTRA_TESTS) -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/multi_schedule $(EXTRA_TESTS)
clean distclean maintainer-clean: clean distclean maintainer-clean:
rm -f $(output_files) $(input_files) rm -f $(output_files) $(input_files)

View File

@ -7,14 +7,14 @@ ERROR: cannot compute aggregate (distinct)
DETAIL: table partitioning is unsuitable for aggregate (distinct) DETAIL: table partitioning is unsuitable for aggregate (distinct)
HINT: You can load the hll extension from contrib packages and enable distinct approximations. HINT: You can load the hll extension from contrib packages and enable distinct approximations.
-- Check approximate count(distinct) at different precisions / error rates -- Check approximate count(distinct) at different precisions / error rates
SET citusdb.count_distinct_error_rate = 0.1; SET citus.count_distinct_error_rate = 0.1;
SELECT count(distinct l_orderkey) FROM lineitem; SELECT count(distinct l_orderkey) FROM lineitem;
count count
------- -------
2612 2612
(1 row) (1 row)
SET citusdb.count_distinct_error_rate = 0.01; SET citus.count_distinct_error_rate = 0.01;
SELECT count(distinct l_orderkey) FROM lineitem; SELECT count(distinct l_orderkey) FROM lineitem;
count count
------- -------
@ -102,7 +102,7 @@ SELECT count(DISTINCT l_orderkey) as distinct_order_count, l_quantity FROM linei
-- If we have an order by on count(distinct) that we intend to push down to -- If we have an order by on count(distinct) that we intend to push down to
-- worker nodes, we need to error out. Otherwise, we are fine. -- worker nodes, we need to error out. Otherwise, we are fine.
SET citusdb.limit_clause_row_fetch_count = 1000; SET citus.limit_clause_row_fetch_count = 1000;
SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as total SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as total
FROM lineitem FROM lineitem
GROUP BY l_returnflag GROUP BY l_returnflag
@ -123,7 +123,7 @@ SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as t
(3 rows) (3 rows)
-- Check that we can revert config and disable count(distinct) approximations -- Check that we can revert config and disable count(distinct) approximations
SET citusdb.count_distinct_error_rate = 0.0; SET citus.count_distinct_error_rate = 0.0;
SELECT count(distinct l_orderkey) FROM lineitem; SELECT count(distinct l_orderkey) FROM lineitem;
ERROR: cannot compute aggregate (distinct) ERROR: cannot compute aggregate (distinct)
DETAIL: table partitioning is unsuitable for aggregate (distinct) DETAIL: table partitioning is unsuitable for aggregate (distinct)

View File

@ -96,10 +96,10 @@ SELECT l_quantity, count(*), avg(l_extendedprice), array_agg(l_orderkey) FROM li
GROUP BY l_quantity ORDER BY l_quantity; GROUP BY l_quantity ORDER BY l_quantity;
l_quantity | count | avg | array_agg l_quantity | count | avg | array_agg
------------+-------+-----------------------+-------------------------------------------------------------------------------------------------- ------------+-------+-----------------------+--------------------------------------------------------------------------------------------------
1.00 | 17 | 1477.1258823529411765 | {5543,5633,5634,5698,5766,5856,5857,5986,8997,9026,9158,9184,9220,9222,9348,9383,9476} 1.00 | 17 | 1477.1258823529411765 | {8997,9026,9158,9184,9220,9222,9348,9383,9476,5543,5633,5634,5698,5766,5856,5857,5986}
2.00 | 19 | 3078.4242105263157895 | {5506,5540,5573,5669,5703,5730,5798,5831,5893,5920,5923,9030,9058,9123,9124,9188,9344,9441,9476} 2.00 | 19 | 3078.4242105263157895 | {9030,9058,9123,9124,9188,9344,9441,9476,5506,5540,5573,5669,5703,5730,5798,5831,5893,5920,5923}
3.00 | 14 | 4714.0392857142857143 | {5509,5543,5605,5606,5827,9124,9157,9184,9223,9254,9349,9414,9475,9477} 3.00 | 14 | 4714.0392857142857143 | {9124,9157,9184,9223,9254,9349,9414,9475,9477,5509,5543,5605,5606,5827}
4.00 | 19 | 5929.7136842105263158 | {5504,5507,5508,5511,5538,5764,5766,5826,5829,5862,5959,5985,9091,9120,9281,9347,9382,9440,9473} 4.00 | 19 | 5929.7136842105263158 | {9091,9120,9281,9347,9382,9440,9473,5504,5507,5508,5511,5538,5764,5766,5826,5829,5862,5959,5985}
(4 rows) (4 rows)
SELECT l_quantity, array_agg(extract (month FROM o_orderdate)) AS my_month SELECT l_quantity, array_agg(extract (month FROM o_orderdate)) AS my_month
@ -107,10 +107,10 @@ SELECT l_quantity, array_agg(extract (month FROM o_orderdate)) AS my_month
AND l_orderkey > 5500 AND l_orderkey < 9500 GROUP BY l_quantity ORDER BY l_quantity; AND l_orderkey > 5500 AND l_orderkey < 9500 GROUP BY l_quantity ORDER BY l_quantity;
l_quantity | my_month l_quantity | my_month
------------+------------------------------------------------ ------------+------------------------------------------------
1.00 | {9,5,7,5,9,11,11,4,7,7,4,7,4,2,6,3,5} 1.00 | {7,7,4,7,4,2,6,3,5,9,5,7,5,9,11,11,4}
2.00 | {11,10,8,5,5,12,3,11,7,11,5,7,6,6,10,1,12,6,5} 2.00 | {7,6,6,10,1,12,6,5,11,10,8,5,5,12,3,11,7,11,5}
3.00 | {4,9,8,11,7,10,6,7,8,5,8,9,11,3} 3.00 | {10,6,7,8,5,8,9,11,3,4,9,8,11,7}
4.00 | {1,5,6,11,12,10,9,6,1,2,5,1,11,6,2,8,2,6,10} 4.00 | {11,6,2,8,2,6,10,1,5,6,11,12,10,9,6,1,2,5,1}
(4 rows) (4 rows)
SELECT l_quantity, array_agg(l_orderkey * 2 + 1) FROM lineitem WHERE l_quantity < 5 SELECT l_quantity, array_agg(l_orderkey * 2 + 1) FROM lineitem WHERE l_quantity < 5
@ -118,10 +118,10 @@ SELECT l_quantity, array_agg(l_orderkey * 2 + 1) FROM lineitem WHERE l_quantity
AND l_orderkey > 5500 AND l_orderkey < 9500 GROUP BY l_quantity ORDER BY l_quantity; AND l_orderkey > 5500 AND l_orderkey < 9500 GROUP BY l_quantity ORDER BY l_quantity;
l_quantity | array_agg l_quantity | array_agg
------------+--------------------------------------------- ------------+---------------------------------------------
1.00 | {11269,11397,11713,11715,11973,18317,18445} 1.00 | {18317,18445,11269,11397,11713,11715,11973}
2.00 | {11847,18061,18247,18953} 2.00 | {18061,18247,18953,11847}
3.00 | {18249,18315,18699,18951,18955} 3.00 | {18249,18315,18699,18951,18955}
4.00 | {11653,11659,18241,18765} 4.00 | {18241,18765,11653,11659}
(4 rows) (4 rows)
-- Check that we can execute array_agg() with an expression containing NULL values -- Check that we can execute array_agg() with an expression containing NULL values

View File

@ -2,8 +2,8 @@
-- MULTI_BINARY_MASTER_COPY -- MULTI_BINARY_MASTER_COPY
-- --
-- Try binary master copy for different executors -- Try binary master copy for different executors
SET citusdb.binary_master_copy_format TO 'on'; SET citus.binary_master_copy_format TO 'on';
SET citusdb.task_executor_type TO 'task-tracker'; SET citus.task_executor_type TO 'task-tracker';
SELECT count(*) FROM lineitem; SELECT count(*) FROM lineitem;
count count
------- -------
@ -17,7 +17,7 @@ SELECT l_shipmode FROM lineitem WHERE l_partkey = 67310 OR l_partkey = 155190;
MAIL MAIL
(2 rows) (2 rows)
SET citusdb.task_executor_type TO 'real-time'; SET citus.task_executor_type TO 'real-time';
SELECT count(*) FROM lineitem; SELECT count(*) FROM lineitem;
count count
------- -------

Some files were not shown because too many files have changed in this diff Show More