diff --git a/.circleci/config.yml b/.circleci/config.yml index ed890b951..d5eadd94f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -6,7 +6,7 @@ orbs: parameters: image_suffix: type: string - default: '-vc4b1573' + default: '-v087ecd7' pg13_version: type: string default: '13.10' @@ -201,6 +201,9 @@ jobs: - run: name: 'Check if all GUCs are sorted alphabetically' command: ci/check_gucs_are_alphabetically_sorted.sh + - run: + name: 'Check for missing downgrade scripts' + command: ci/check_migration_files.sh check-sql-snapshots: docker: @@ -266,6 +269,41 @@ jobs: - coverage: flags: 'test_<< parameters.old_pg_major >>_<< parameters.new_pg_major >>,upgrade' + test-pytest: + description: Runs pytest based tests + parameters: + pg_major: + description: 'postgres major version' + type: integer + image: + description: 'docker image to use as for the tests' + type: string + default: citus/failtester + image_tag: + description: 'docker image tag to use' + type: string + docker: + - image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - install_extension: + pg_major: << parameters.pg_major >> + - configure + - enable_core + - run: + name: 'Run pytest' + command: | + gosu circleci \ + make -C src/test/regress check-pytest + no_output_timeout: 2m + - stack_trace + - coverage: + flags: 'test_<< parameters.pg_major >>,pytest' + + test-arbitrary-configs: description: Runs tests on arbitrary configs parallelism: 6 @@ -452,6 +490,10 @@ jobs: pg_major: << parameters.pg_major >> - configure - enable_core + - run: + name: 'Install DBI.pm' + command: | + apt-get update && apt-get install libdbi-perl && apt-get install libdbd-pg-perl - run: name: 'Run Test' command: | @@ -551,7 +593,7 @@ jobs: testForDebugging="<< parameters.test >>" if [ -z "$testForDebugging" ]; then - detected_changes=$(git diff origin/main... --name-only --diff-filter=AM | (grep 'src/test/regress/sql/.*.sql\|src/test/regress/spec/.*.spec' || true)) + detected_changes=$(git diff origin/main... --name-only --diff-filter=AM | (grep 'src/test/regress/sql/.*\.sql\|src/test/regress/spec/.*\.spec\|src/test/regress/citus_tests/test/test_.*\.py' || true)) tests=${detected_changes} else tests=$testForDebugging; @@ -854,38 +896,30 @@ workflows: image: citus/failtester make: check-failure - - tap-test-citus: &tap-test-citus-13 - name: 'test-13_tap-recovery' - suite: recovery + - test-pytest: + name: 'test-13_pytest' pg_major: 13 image_tag: '<< pipeline.parameters.pg13_version >>' requires: [build-13] - - tap-test-citus: - <<: *tap-test-citus-13 - name: 'test-13_tap-columnar-freezing' - suite: columnar_freezing - - tap-test-citus: &tap-test-citus-14 - name: 'test-14_tap-recovery' - suite: recovery + - test-pytest: + name: 'test-14_pytest' pg_major: 14 image_tag: '<< pipeline.parameters.pg14_version >>' requires: [build-14] - - tap-test-citus: - <<: *tap-test-citus-14 - name: 'test-14_tap-columnar-freezing' - suite: columnar_freezing - - tap-test-citus: &tap-test-citus-15 - name: 'test-15_tap-recovery' - suite: recovery + - test-pytest: + name: 'test-15_pytest' pg_major: 15 image_tag: '<< pipeline.parameters.pg15_version >>' requires: [build-15] + - tap-test-citus: - <<: *tap-test-citus-15 - name: 'test-15_tap-columnar-freezing' - suite: columnar_freezing + name: 'test-15_tap-cdc' + suite: cdc + pg_major: 15 + image_tag: '<< pipeline.parameters.pg15_version >>' + requires: [build-15] - test-arbitrary-configs: name: 'test-13_check-arbitrary-configs' @@ -936,8 +970,6 @@ workflows: - test-13_check-follower-cluster - test-13_check-columnar - test-13_check-columnar-isolation - - test-13_tap-recovery - - test-13_tap-columnar-freezing - test-13_check-failure - test-13_check-enterprise - test-13_check-enterprise-isolation @@ -956,8 +988,6 @@ workflows: - test-14_check-follower-cluster - test-14_check-columnar - test-14_check-columnar-isolation - - test-14_tap-recovery - - test-14_tap-columnar-freezing - test-14_check-failure - test-14_check-enterprise - test-14_check-enterprise-isolation @@ -976,8 +1006,6 @@ workflows: - test-15_check-follower-cluster - test-15_check-columnar - test-15_check-columnar-isolation - - test-15_tap-recovery - - test-15_tap-columnar-freezing - test-15_check-failure - test-15_check-enterprise - test-15_check-enterprise-isolation diff --git a/.editorconfig b/.editorconfig index 8091a1f57..698e94d2b 100644 --- a/.editorconfig +++ b/.editorconfig @@ -17,7 +17,7 @@ trim_trailing_whitespace = true insert_final_newline = unset trim_trailing_whitespace = unset -[*.{sql,sh,py}] +[*.{sql,sh,py,toml}] indent_style = space indent_size = 4 tab_width = 4 diff --git a/.flake8 b/.flake8 index 112224148..18feeb500 100644 --- a/.flake8 +++ b/.flake8 @@ -1,7 +1,6 @@ [flake8] # E203 is ignored for black -# E402 is ignored because of te way we do relative imports -extend-ignore = E203, E402 +extend-ignore = E203 # black will truncate to 88 characters usually, but long string literals it # might keep. That's fine in most cases unless it gets really excessive. max-line-length = 150 diff --git a/.github/workflows/packaging-test-pipelines.yml b/.github/workflows/packaging-test-pipelines.yml index ae8d9d725..aecf8876c 100644 --- a/.github/workflows/packaging-test-pipelines.yml +++ b/.github/workflows/packaging-test-pipelines.yml @@ -157,7 +157,6 @@ jobs: apt-get update -y ## Install required packages to execute packaging tools for deb based distros - apt install python3-dev python3-pip -y - sudo apt-get purge -y python3-yaml - python3 -m pip install --upgrade pip setuptools==57.5.0 + apt-get install python3-dev python3-pip -y + apt-get purge -y python3-yaml ./.github/packaging/validate_build_output.sh "deb" diff --git a/ci/README.md b/ci/README.md index f4dde9cc3..37ef94f4f 100644 --- a/ci/README.md +++ b/ci/README.md @@ -283,6 +283,14 @@ actually run in CI. This is most commonly forgotten for newly added CI tests that the developer only ran locally. It also checks that all CI scripts have a section in this `README.md` file and that they include `ci/ci_helpers.sh`. +## `check_migration_files.sh` + +A branch that touches a set of upgrade scripts is also expected to touch +corresponding downgrade scripts as well. If this script fails, read the output +and make sure you update the downgrade scripts in the printed list. If you +really don't need a downgrade to run any SQL. You can write a comment in the +file explaining why a downgrade step is not necessary. + ## `disallow_c_comments_in_migrations.sh` We do not use C-style comments in migration files as the stripped diff --git a/ci/check_migration_files.sh b/ci/check_migration_files.sh new file mode 100755 index 000000000..61fc59540 --- /dev/null +++ b/ci/check_migration_files.sh @@ -0,0 +1,33 @@ +#! /bin/bash + +set -euo pipefail +# shellcheck disable=SC1091 +source ci/ci_helpers.sh + +# This file checks for the existence of downgrade scripts for every upgrade script that is changed in the branch. + +# create list of migration files for upgrades +upgrade_files=$(git diff --name-only origin/main | { grep "src/backend/distributed/sql/citus--.*sql" || exit 0 ; }) +downgrade_files=$(git diff --name-only origin/main | { grep "src/backend/distributed/sql/downgrades/citus--.*sql" || exit 0 ; }) +ret_value=0 + +for file in $upgrade_files +do + # There should always be 2 matches, and no need to avoid splitting here + # shellcheck disable=SC2207 + versions=($(grep --only-matching --extended-regexp "[0-9]+\.[0-9]+[-.][0-9]+" <<< "$file")) + + from_version=${versions[0]}; + to_version=${versions[1]}; + + downgrade_migration_file="src/backend/distributed/sql/downgrades/citus--$to_version--$from_version.sql" + + # check for the existence of migration scripts + if [[ $(grep --line-regexp --count "$downgrade_migration_file" <<< "$downgrade_files") == 0 ]] + then + echo "$file is updated, but $downgrade_migration_file is not updated in branch" + ret_value=1 + fi +done + +exit $ret_value; diff --git a/pyproject.toml b/pyproject.toml index a470b2d92..997fb3801 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,3 +3,35 @@ profile = 'black' [tool.black] include = '(src/test/regress/bin/diff-filter|\.pyi?|\.ipynb)$' + +[tool.pytest.ini_options] +addopts = [ + "--import-mode=importlib", + "--showlocals", + "--tb=short", +] +pythonpath = 'src/test/regress/citus_tests' +asyncio_mode = 'auto' + +# Make test discovery quicker from the root dir of the repo +testpaths = ['src/test/regress/citus_tests/test'] + +# Make test discovery quicker from other directories than root directory +norecursedirs = [ + '*.egg', + '.*', + 'build', + 'venv', + 'ci', + 'vendor', + 'backend', + 'bin', + 'include', + 'tmp_*', + 'results', + 'expected', + 'sql', + 'spec', + 'data', + '__pycache__', +] diff --git a/src/backend/columnar/.gitignore b/src/backend/columnar/.gitignore new file mode 100644 index 000000000..b70410d1d --- /dev/null +++ b/src/backend/columnar/.gitignore @@ -0,0 +1,3 @@ +# The directory used to store columnar sql files after pre-processing them +# with 'cpp' in build-time, see src/backend/columnar/Makefile. +/build/ diff --git a/src/backend/columnar/Makefile b/src/backend/columnar/Makefile index f9fa09b7c..ded52a98d 100644 --- a/src/backend/columnar/Makefile +++ b/src/backend/columnar/Makefile @@ -10,14 +10,51 @@ OBJS += \ MODULE_big = citus_columnar EXTENSION = citus_columnar -columnar_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/*.sql)) -columnar_downgrade_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/downgrades/*.sql)) -DATA = $(columnar_sql_files) \ - $(columnar_downgrade_sql_files) +template_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/*.sql)) +template_downgrade_sql_files = $(patsubst $(citus_abs_srcdir)/sql/downgrades/%,%,$(wildcard $(citus_abs_srcdir)/sql/downgrades/*.sql)) +generated_sql_files = $(patsubst %,$(citus_abs_srcdir)/build/%,$(template_sql_files)) +generated_downgrade_sql_files += $(patsubst %,$(citus_abs_srcdir)/build/sql/%,$(template_downgrade_sql_files)) + +DATA_built = $(generated_sql_files) PG_CPPFLAGS += -I$(libpq_srcdir) -I$(safestringlib_srcdir)/include include $(citus_top_builddir)/Makefile.global -.PHONY: install-all +SQL_DEPDIR=.deps/sql +SQL_BUILDDIR=build/sql + +$(generated_sql_files): $(citus_abs_srcdir)/build/%: % + @mkdir -p $(citus_abs_srcdir)/$(SQL_DEPDIR) $(citus_abs_srcdir)/$(SQL_BUILDDIR) + @# -MF is used to store dependency files(.Po) in another directory for separation + @# -MT is used to change the target of the rule emitted by dependency generation. + @# -P is used to inhibit generation of linemarkers in the output from the preprocessor. + @# -undef is used to not predefine any system-specific or GCC-specific macros. + @# `man cpp` for further information + cd $(citus_abs_srcdir) && cpp -undef -w -P -MMD -MP -MF$(SQL_DEPDIR)/$(*F).Po -MT$@ $< > $@ + +$(generated_downgrade_sql_files): $(citus_abs_srcdir)/build/sql/%: sql/downgrades/% + @mkdir -p $(citus_abs_srcdir)/$(SQL_DEPDIR) $(citus_abs_srcdir)/$(SQL_BUILDDIR) + @# -MF is used to store dependency files(.Po) in another directory for separation + @# -MT is used to change the target of the rule emitted by dependency generation. + @# -P is used to inhibit generation of linemarkers in the output from the preprocessor. + @# -undef is used to not predefine any system-specific or GCC-specific macros. + @# `man cpp` for further information + cd $(citus_abs_srcdir) && cpp -undef -w -P -MMD -MP -MF$(SQL_DEPDIR)/$(*F).Po -MT$@ $< > $@ + +.PHONY: install install-downgrades install-all + +cleanup-before-install: + rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus_columnar.control + rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/columnar--* + rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus_columnar--* + +install: cleanup-before-install + +# install and install-downgrades should be run sequentially install-all: install + $(MAKE) install-downgrades + +install-downgrades: $(generated_downgrade_sql_files) + $(INSTALL_DATA) $(generated_downgrade_sql_files) '$(DESTDIR)$(datadir)/$(datamoduledir)/' + diff --git a/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql b/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql index 60a0401d5..89ccd9e74 100644 --- a/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql +++ b/src/backend/columnar/sql/citus_columnar--11.1-1--11.2-1.sql @@ -1 +1,19 @@ -- citus_columnar--11.1-1--11.2-1 + +#include "udfs/columnar_ensure_am_depends_catalog/11.2-1.sql" + +DELETE FROM pg_depend +WHERE classid = 'pg_am'::regclass::oid + AND objid IN (select oid from pg_am where amname = 'columnar') + AND objsubid = 0 + AND refclassid = 'pg_class'::regclass::oid + AND refobjid IN ( + 'columnar_internal.stripe_first_row_number_idx'::regclass::oid, + 'columnar_internal.chunk_group_pkey'::regclass::oid, + 'columnar_internal.chunk_pkey'::regclass::oid, + 'columnar_internal.options_pkey'::regclass::oid, + 'columnar_internal.stripe_first_row_number_idx'::regclass::oid, + 'columnar_internal.stripe_pkey'::regclass::oid + ) + AND refobjsubid = 0 + AND deptype = 'n'; diff --git a/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql b/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql index 9acf68da3..c987bfa67 100644 --- a/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql +++ b/src/backend/columnar/sql/downgrades/citus_columnar--11.2-1--11.1-1.sql @@ -1 +1,4 @@ -- citus_columnar--11.2-1--11.1-1 + +-- Note that we intentionally do not re-insert the pg_depend records that we +-- deleted via citus_columnar--11.1-1--11.2-1.sql. diff --git a/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/11.2-1.sql b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/11.2-1.sql new file mode 100644 index 000000000..101db17fb --- /dev/null +++ b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/11.2-1.sql @@ -0,0 +1,43 @@ +CREATE OR REPLACE FUNCTION columnar_internal.columnar_ensure_am_depends_catalog() + RETURNS void + LANGUAGE plpgsql + SET search_path = pg_catalog +AS $func$ +BEGIN + INSERT INTO pg_depend + WITH columnar_schema_members(relid) AS ( + SELECT pg_class.oid AS relid FROM pg_class + WHERE relnamespace = + COALESCE( + (SELECT pg_namespace.oid FROM pg_namespace WHERE nspname = 'columnar_internal'), + (SELECT pg_namespace.oid FROM pg_namespace WHERE nspname = 'columnar') + ) + AND relname IN ('chunk', + 'chunk_group', + 'options', + 'storageid_seq', + 'stripe') + ) + SELECT -- Define a dependency edge from "columnar table access method" .. + 'pg_am'::regclass::oid as classid, + (select oid from pg_am where amname = 'columnar') as objid, + 0 as objsubid, + -- ... to some objects registered as regclass and that lives in + -- "columnar" schema. That contains catalog tables and the sequences + -- created in "columnar" schema. + -- + -- Given the possibility of user might have created their own objects + -- in columnar schema, we explicitly specify list of objects that we + -- are interested in. + 'pg_class'::regclass::oid as refclassid, + columnar_schema_members.relid as refobjid, + 0 as refobjsubid, + 'n' as deptype + FROM columnar_schema_members + -- Avoid inserting duplicate entries into pg_depend. + EXCEPT TABLE pg_depend; +END; +$func$; +COMMENT ON FUNCTION columnar_internal.columnar_ensure_am_depends_catalog() + IS 'internal function responsible for creating dependencies from columnar ' + 'table access method to the rel objects in columnar schema'; diff --git a/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql index ade15390a..101db17fb 100644 --- a/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql +++ b/src/backend/columnar/sql/udfs/columnar_ensure_am_depends_catalog/latest.sql @@ -1,4 +1,4 @@ -CREATE OR REPLACE FUNCTION citus_internal.columnar_ensure_am_depends_catalog() +CREATE OR REPLACE FUNCTION columnar_internal.columnar_ensure_am_depends_catalog() RETURNS void LANGUAGE plpgsql SET search_path = pg_catalog @@ -14,22 +14,17 @@ BEGIN ) AND relname IN ('chunk', 'chunk_group', - 'chunk_group_pkey', - 'chunk_pkey', 'options', - 'options_pkey', 'storageid_seq', - 'stripe', - 'stripe_first_row_number_idx', - 'stripe_pkey') + 'stripe') ) SELECT -- Define a dependency edge from "columnar table access method" .. 'pg_am'::regclass::oid as classid, (select oid from pg_am where amname = 'columnar') as objid, 0 as objsubid, - -- ... to each object that is registered to pg_class and that lives - -- in "columnar" schema. That contains catalog tables, indexes - -- created on them and the sequences created in "columnar" schema. + -- ... to some objects registered as regclass and that lives in + -- "columnar" schema. That contains catalog tables and the sequences + -- created in "columnar" schema. -- -- Given the possibility of user might have created their own objects -- in columnar schema, we explicitly specify list of objects that we @@ -43,6 +38,6 @@ BEGIN EXCEPT TABLE pg_depend; END; $func$; -COMMENT ON FUNCTION citus_internal.columnar_ensure_am_depends_catalog() +COMMENT ON FUNCTION columnar_internal.columnar_ensure_am_depends_catalog() IS 'internal function responsible for creating dependencies from columnar ' 'table access method to the rel objects in columnar schema'; diff --git a/src/backend/distributed/Makefile b/src/backend/distributed/Makefile index a51c2a1f5..1cefb5769 100644 --- a/src/backend/distributed/Makefile +++ b/src/backend/distributed/Makefile @@ -32,7 +32,13 @@ OBJS += \ $(patsubst $(citus_abs_srcdir)/%.c,%.o,$(foreach dir,$(SUBDIRS), $(sort $(wildcard $(citus_abs_srcdir)/$(dir)/*.c)))) # be explicit about the default target -all: +.PHONY: cdc + +all: cdc + +cdc: + echo "running cdc make" + $(MAKE) DECODER=pgoutput -C cdc all NO_PGXS = 1 @@ -81,11 +87,19 @@ endif .PHONY: clean-full install install-downgrades install-all +clean: clean-cdc + +clean-cdc: + $(MAKE) DECODER=pgoutput -C cdc clean + cleanup-before-install: rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus.control rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus--* -install: cleanup-before-install +install: cleanup-before-install install-cdc + +install-cdc: + $(MAKE) DECODER=pgoutput -C cdc install # install and install-downgrades should be run sequentially install-all: install @@ -96,4 +110,5 @@ install-downgrades: $(generated_downgrade_sql_files) clean-full: $(MAKE) clean + $(MAKE) -C cdc clean-full rm -rf $(safestringlib_builddir) diff --git a/src/backend/distributed/cdc/Makefile b/src/backend/distributed/cdc/Makefile new file mode 100644 index 000000000..76aa28726 --- /dev/null +++ b/src/backend/distributed/cdc/Makefile @@ -0,0 +1,26 @@ +ifndef DECODER + DECODER = pgoutput +endif + +MODULE_big = citus_$(DECODER) +citus_subdir = src/backend/distributed/cdc +citus_top_builddir = ../../../.. +citus_decoders_dir = $(DESTDIR)$(pkglibdir)/citus_decoders + +OBJS += cdc_decoder.o cdc_decoder_utils.o + +include $(citus_top_builddir)/Makefile.global + +override CFLAGS += -DDECODER=\"$(DECODER)\" -I$(citus_abs_top_srcdir)/include +override CPPFLAGS += -DDECODER=\"$(DECODER)\" -I$(citus_abs_top_srcdir)/include + +install: install-cdc + +clean: clean-cdc + +install-cdc: + mkdir -p '$(citus_decoders_dir)' + $(INSTALL_SHLIB) citus_$(DECODER).so '$(citus_decoders_dir)/$(DECODER).so' + +clean-cdc: + rm -f '$(DESTDIR)$(datadir)/$(datamoduledir)/citus_decoders/$(DECODER).so' diff --git a/src/backend/distributed/cdc/cdc_decoder.c b/src/backend/distributed/cdc/cdc_decoder.c new file mode 100644 index 000000000..9dfb8bc12 --- /dev/null +++ b/src/backend/distributed/cdc/cdc_decoder.c @@ -0,0 +1,500 @@ +/*------------------------------------------------------------------------- + * + * cdc_decoder.c + * CDC Decoder plugin for Citus + * + * Copyright (c) Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#include "cdc_decoder_utils.h" +#include "postgres.h" +#include "fmgr.h" + +#include "access/genam.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_publication.h" +#include "commands/extension.h" +#include "common/hashfn.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/typcache.h" + +PG_MODULE_MAGIC; + +extern void _PG_output_plugin_init(OutputPluginCallbacks *cb); +static LogicalDecodeChangeCB ouputPluginChangeCB; + +static void InitShardToDistributedTableMap(void); + +static void PublishDistributedTableChanges(LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, + Relation relation, + ReorderBufferChange *change); + + +static bool replication_origin_filter_cb(LogicalDecodingContext *ctx, RepOriginId + origin_id); + +static void TranslateChangesIfSchemaChanged(Relation relation, Relation targetRelation, + ReorderBufferChange *change); + +static void TranslateAndPublishRelationForCDC(LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, + Relation relation, + ReorderBufferChange *change, Oid shardId, + Oid targetRelationid); + +typedef struct +{ + uint64 shardId; + Oid distributedTableId; + bool isReferenceTable; + bool isNull; +} ShardIdHashEntry; + +static HTAB *shardToDistributedTableMap = NULL; + +static void cdc_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, + Relation relation, ReorderBufferChange *change); + + +/* build time macro for base decoder plugin name for CDC and Shard Split. */ +#ifndef DECODER +#define DECODER "pgoutput" +#endif + +#define DECODER_INIT_FUNCTION_NAME "_PG_output_plugin_init" + +#define CITUS_SHARD_TRANSFER_SLOT_PREFIX "citus_shard_" +#define CITUS_SHARD_TRANSFER_SLOT_PREFIX_SIZE (sizeof(CITUS_SHARD_TRANSFER_SLOT_PREFIX) - \ + 1) + +/* + * Postgres uses 'pgoutput' as default plugin for logical replication. + * We want to reuse Postgres pgoutput's functionality as much as possible. + * Hence we load all the functions of this plugin and override as required. + */ +void +_PG_output_plugin_init(OutputPluginCallbacks *cb) +{ + elog(LOG, "Initializing CDC decoder"); + + /* + * We build custom .so files whose name matches common decoders (pgoutput, wal2json) + * and place them in $libdir/citus_decoders/ such that administrators can configure + * dynamic_library_path to include this directory, and users can then use the + * regular decoder names when creating replications slots. + * + * To load the original decoder, we need to remove citus_decoders/ from the + * dynamic_library_path. + */ + char *originalDLP = Dynamic_library_path; + Dynamic_library_path = RemoveCitusDecodersFromPaths(Dynamic_library_path); + + LogicalOutputPluginInit plugin_init = + (LogicalOutputPluginInit) (void *) + load_external_function(DECODER, + DECODER_INIT_FUNCTION_NAME, + false, NULL); + + if (plugin_init == NULL) + { + elog(ERROR, "output plugins have to declare the _PG_output_plugin_init symbol"); + } + + /* in case this session is used for different replication slots */ + Dynamic_library_path = originalDLP; + + /* ask the output plugin to fill the callback struct */ + plugin_init(cb); + + /* Initialize the Shard Id to Distributed Table id mapping hash table.*/ + InitShardToDistributedTableMap(); + + /* actual pgoutput callback function will be called */ + ouputPluginChangeCB = cb->change_cb; + cb->change_cb = cdc_change_cb; + cb->filter_by_origin_cb = replication_origin_filter_cb; +} + + +/* + * Check if the replication slot is for Shard transfer by checking for prefix. + */ +inline static +bool +IsShardTransferSlot(char *replicationSlotName) +{ + return strncmp(replicationSlotName, CITUS_SHARD_TRANSFER_SLOT_PREFIX, + CITUS_SHARD_TRANSFER_SLOT_PREFIX_SIZE) == 0; +} + + +/* + * shard_split_and_cdc_change_cb function emits the incoming tuple change + * to the appropriate destination shard. + */ +static void +cdc_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, + Relation relation, ReorderBufferChange *change) +{ + /* + * If Citus has not been loaded yet, pass the changes + * through to the undrelying decoder plugin. + */ + if (!CdcCitusHasBeenLoaded()) + { + ouputPluginChangeCB(ctx, txn, relation, change); + return; + } + + /* check if the relation is publishable.*/ + if (!is_publishable_relation(relation)) + { + return; + } + + char *replicationSlotName = ctx->slot->data.name.data; + if (replicationSlotName == NULL) + { + elog(ERROR, "Replication slot name is NULL!"); + return; + } + + /* If the slot is for internal shard operations, call the base plugin's call back. */ + if (IsShardTransferSlot(replicationSlotName)) + { + ouputPluginChangeCB(ctx, txn, relation, change); + return; + } + + /* Transalate the changes from shard to distributes table and publish. */ + PublishDistributedTableChanges(ctx, txn, relation, change); +} + + +/* + * InitShardToDistributedTableMap initializes the hash table that is used to + * translate the changes in the shard table to the changes in the distributed table. + */ +static void +InitShardToDistributedTableMap() +{ + HASHCTL info; + memset(&info, 0, sizeof(info)); + info.keysize = sizeof(uint64); + info.entrysize = sizeof(ShardIdHashEntry); + info.hash = tag_hash; + info.hcxt = CurrentMemoryContext; + + int hashFlags = (HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION); + shardToDistributedTableMap = hash_create("CDC Decoder translation hash table", 1024, + &info, hashFlags); +} + + +/* + * AddShardIdToHashTable adds the shardId to the hash table. + */ +static Oid +AddShardIdToHashTable(uint64 shardId, ShardIdHashEntry *entry) +{ + entry->shardId = shardId; + entry->distributedTableId = CdcLookupShardRelationFromCatalog(shardId, true); + entry->isReferenceTable = CdcPartitionMethodViaCatalog(entry->distributedTableId) == + 'n'; + return entry->distributedTableId; +} + + +static Oid +LookupDistributedTableIdForShardId(uint64 shardId, bool *isReferenceTable) +{ + bool found; + Oid distributedTableId = InvalidOid; + ShardIdHashEntry *entry = (ShardIdHashEntry *) hash_search(shardToDistributedTableMap, + &shardId, + HASH_ENTER, + &found); + if (found) + { + distributedTableId = entry->distributedTableId; + } + else + { + distributedTableId = AddShardIdToHashTable(shardId, entry); + } + *isReferenceTable = entry->isReferenceTable; + return distributedTableId; +} + + +/* + * replication_origin_filter_cb call back function filters out publication of changes + * originated from any other node other than the current node. This is + * identified by the "origin_id" of the changes. The origin_id is set to + * a non-zero value in the origin node as part of WAL replication for internal + * operations like shard split/moves/create_distributed_table etc. + */ +static bool +replication_origin_filter_cb(LogicalDecodingContext *ctx, RepOriginId origin_id) +{ + return (origin_id != InvalidRepOriginId); +} + + +/* + * This function is responsible for translating the changes in the shard table to + * the changes in the shell table and publishing the changes as a change to the + * distributed table so that CDD clients are not aware of the shard tables. It also + * handles schema changes to the distributed table. + */ +static void +TranslateAndPublishRelationForCDC(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, + Relation relation, ReorderBufferChange *change, Oid + shardId, Oid targetRelationid) +{ + /* Get the distributed table's relation for this shard.*/ + Relation targetRelation = RelationIdGetRelation(targetRelationid); + + /* + * Check if there has been a schema change (such as a dropped column), by comparing + * the number of attributes in the shard table and the shell table. + */ + TranslateChangesIfSchemaChanged(relation, targetRelation, change); + + /* + * Publish the change to the shard table as the change in the distributed table, + * so that the CDC client can see the change in the distributed table, + * instead of the shard table, by calling the pgoutput's callback function. + */ + ouputPluginChangeCB(ctx, txn, targetRelation, change); + RelationClose(targetRelation); +} + + +/* + * PublishChangesIfCdcSlot checks if the current slot is a CDC slot. If so, it publishes + * the changes as the change for the distributed table instead of shard. + * If not, it returns false. It also skips the Citus metadata tables. + */ +static void +PublishDistributedTableChanges(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, + Relation relation, ReorderBufferChange *change) +{ + char *shardRelationName = RelationGetRelationName(relation); + + /* Skip publishing CDC changes for any system relations in pg_catalog*/ + if (relation->rd_rel->relnamespace == PG_CATALOG_NAMESPACE) + { + return; + } + + /* Check if the relation is a distributed table by checking for shard name. */ + uint64 shardId = CdcExtractShardIdFromTableName(shardRelationName, true); + + /* If this relation is not distributed, call the pgoutput's callback and return. */ + if (shardId == INVALID_SHARD_ID) + { + ouputPluginChangeCB(ctx, txn, relation, change); + return; + } + + bool isReferenceTable = false; + Oid distRelationId = LookupDistributedTableIdForShardId(shardId, &isReferenceTable); + if (distRelationId == InvalidOid) + { + ouputPluginChangeCB(ctx, txn, relation, change); + return; + } + + /* Publish changes for reference table only from the coordinator node. */ + if (isReferenceTable && !CdcIsCoordinator()) + { + return; + } + + /* translate and publish from shard relation to distributed table relation for CDC. */ + TranslateAndPublishRelationForCDC(ctx, txn, relation, change, shardId, + distRelationId); +} + + +/* + * GetTupleForTargetSchemaForCdc returns a heap tuple with the data from sourceRelationTuple + * to match the schema in targetRelDesc. Either or both source and target relations may have + * dropped columns. This function handles it by adding NULL values for dropped columns in + * target relation and skipping dropped columns in source relation. It returns a heap tuple + * adjusted to the current schema of the target relation. + */ +static HeapTuple +GetTupleForTargetSchemaForCdc(HeapTuple sourceRelationTuple, + TupleDesc sourceRelDesc, + TupleDesc targetRelDesc) +{ + /* Allocate memory for sourceValues and sourceNulls arrays. */ + Datum *sourceValues = (Datum *) palloc0(sourceRelDesc->natts * sizeof(Datum)); + bool *sourceNulls = (bool *) palloc0(sourceRelDesc->natts * sizeof(bool)); + + /* Deform the source tuple to sourceValues and sourceNulls arrays. */ + heap_deform_tuple(sourceRelationTuple, sourceRelDesc, sourceValues, + sourceNulls); + + /* This is the next field to Read in the source relation */ + uint32 sourceIndex = 0; + uint32 targetIndex = 0; + + /* Allocate memory for sourceValues and sourceNulls arrays. */ + Datum *targetValues = (Datum *) palloc0(targetRelDesc->natts * sizeof(Datum)); + bool *targetNulls = (bool *) palloc0(targetRelDesc->natts * sizeof(bool)); + + /* Loop through all source and target attributes one by one and handle any dropped attributes.*/ + while (targetIndex < targetRelDesc->natts) + { + /* If this target attribute has been dropped, add a NULL attribute in targetValues and continue.*/ + if (TupleDescAttr(targetRelDesc, targetIndex)->attisdropped) + { + Datum nullDatum = (Datum) 0; + targetValues[targetIndex] = nullDatum; + targetNulls[targetIndex] = true; + targetIndex++; + } + /* If this source attribute has been dropped, just skip this source attribute.*/ + else if (TupleDescAttr(sourceRelDesc, sourceIndex)->attisdropped) + { + sourceIndex++; + continue; + } + /* If both source and target attributes are not dropped, add the attribute field to targetValues. */ + else if (sourceIndex < sourceRelDesc->natts) + { + targetValues[targetIndex] = sourceValues[sourceIndex]; + targetNulls[targetIndex] = sourceNulls[sourceIndex]; + sourceIndex++; + targetIndex++; + } + else + { + /* If there are no more source fields, add a NULL field in targetValues. */ + Datum nullDatum = (Datum) 0; + targetValues[targetIndex] = nullDatum; + targetNulls[targetIndex] = true; + targetIndex++; + } + } + + /* Form a new tuple from the target values created by the above loop. */ + HeapTuple targetRelationTuple = heap_form_tuple(targetRelDesc, targetValues, + targetNulls); + return targetRelationTuple; +} + + +/* HasSchemaChanged function returns if there any schema changes between source and target relations.*/ +static bool +HasSchemaChanged(TupleDesc sourceRelationDesc, TupleDesc targetRelationDesc) +{ + bool hasSchemaChanged = (sourceRelationDesc->natts != targetRelationDesc->natts); + if (hasSchemaChanged) + { + return true; + } + + for (uint32 i = 0; i < sourceRelationDesc->natts; i++) + { + if (TupleDescAttr(sourceRelationDesc, i)->attisdropped || + TupleDescAttr(targetRelationDesc, i)->attisdropped) + { + hasSchemaChanged = true; + break; + } + } + + return hasSchemaChanged; +} + + +/* + * TranslateChangesIfSchemaChanged translates the tuples ReorderBufferChange + * if there is a schema change between source and target relations. + */ +static void +TranslateChangesIfSchemaChanged(Relation sourceRelation, Relation targetRelation, + ReorderBufferChange *change) +{ + TupleDesc sourceRelationDesc = RelationGetDescr(sourceRelation); + TupleDesc targetRelationDesc = RelationGetDescr(targetRelation); + + /* if there are no changes between source and target relations, return. */ + if (!HasSchemaChanged(sourceRelationDesc, targetRelationDesc)) + { + return; + } + + /* Check the ReorderBufferChange's action type and handle them accordingly.*/ + switch (change->action) + { + case REORDER_BUFFER_CHANGE_INSERT: + { + /* For insert action, only new tuple should always be translated*/ + HeapTuple sourceRelationNewTuple = &(change->data.tp.newtuple->tuple); + HeapTuple targetRelationNewTuple = GetTupleForTargetSchemaForCdc( + sourceRelationNewTuple, sourceRelationDesc, targetRelationDesc); + change->data.tp.newtuple->tuple = *targetRelationNewTuple; + break; + } + + /* + * For update changes both old and new tuples need to be translated for target relation + * if the REPLICA IDENTITY is set to FULL. Otherwise, only the new tuple needs to be + * translated for target relation. + */ + case REORDER_BUFFER_CHANGE_UPDATE: + { + /* For update action, new tuple should always be translated*/ + /* Get the new tuple from the ReorderBufferChange, and translate it to target relation. */ + HeapTuple sourceRelationNewTuple = &(change->data.tp.newtuple->tuple); + HeapTuple targetRelationNewTuple = GetTupleForTargetSchemaForCdc( + sourceRelationNewTuple, sourceRelationDesc, targetRelationDesc); + change->data.tp.newtuple->tuple = *targetRelationNewTuple; + + /* + * Format oldtuple according to the target relation. If the column values of replica + * identiy change, then the old tuple is non-null and needs to be formatted according + * to the target relation schema. + */ + if (change->data.tp.oldtuple != NULL) + { + HeapTuple sourceRelationOldTuple = &(change->data.tp.oldtuple->tuple); + HeapTuple targetRelationOldTuple = GetTupleForTargetSchemaForCdc( + sourceRelationOldTuple, + sourceRelationDesc, + targetRelationDesc); + + change->data.tp.oldtuple->tuple = *targetRelationOldTuple; + } + break; + } + + case REORDER_BUFFER_CHANGE_DELETE: + { + /* For delete action, only old tuple should be translated*/ + HeapTuple sourceRelationOldTuple = &(change->data.tp.oldtuple->tuple); + HeapTuple targetRelationOldTuple = GetTupleForTargetSchemaForCdc( + sourceRelationOldTuple, + sourceRelationDesc, + targetRelationDesc); + + change->data.tp.oldtuple->tuple = *targetRelationOldTuple; + break; + } + + default: + { + /* Do nothing for other action types. */ + break; + } + } +} diff --git a/src/backend/distributed/cdc/cdc_decoder_utils.c b/src/backend/distributed/cdc/cdc_decoder_utils.c new file mode 100644 index 000000000..272221a5f --- /dev/null +++ b/src/backend/distributed/cdc/cdc_decoder_utils.c @@ -0,0 +1,432 @@ +/*------------------------------------------------------------------------- + * + * cdc_decoder_utils.c + * CDC Decoder plugin utility functions for Citus + * + * Copyright (c) Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "commands/extension.h" +#include "fmgr.h" +#include "miscadmin.h" +#include "access/genam.h" +#include "access/heapam.h" +#include "common/hashfn.h" +#include "common/string.h" +#include "utils/fmgroids.h" +#include "utils/typcache.h" +#include "utils/lsyscache.h" +#include "catalog/pg_namespace.h" +#include "cdc_decoder_utils.h" +#include "distributed/pg_dist_partition.h" +#include "distributed/pg_dist_shard.h" +#include "distributed/relay_utility.h" + +static int32 LocalGroupId = -1; +static Oid PgDistLocalGroupRelationId = InvalidOid; +static Oid PgDistShardRelationId = InvalidOid; +static Oid PgDistShardShardidIndexId = InvalidOid; +static Oid PgDistPartitionRelationId = InvalidOid; +static Oid PgDistPartitionLogicalrelidIndexId = InvalidOid; +static bool IsCitusExtensionLoaded = false; + +#define COORDINATOR_GROUP_ID 0 +#define InvalidRepOriginId 0 +#define Anum_pg_dist_local_groupid 1 +#define GROUP_ID_UPGRADING -2 + + +static Oid DistLocalGroupIdRelationId(void); +static int32 CdcGetLocalGroupId(void); +static HeapTuple CdcPgDistPartitionTupleViaCatalog(Oid relationId); + +/* + * DistLocalGroupIdRelationId returns the relation id of the pg_dist_local_group + */ +static Oid +DistLocalGroupIdRelationId(void) +{ + if (PgDistLocalGroupRelationId == InvalidOid) + { + PgDistLocalGroupRelationId = get_relname_relid("pg_dist_local_group", + PG_CATALOG_NAMESPACE); + } + return PgDistLocalGroupRelationId; +} + + +/* + * DistShardRelationId returns the relation id of the pg_dist_shard + */ +static Oid +DistShardRelationId(void) +{ + if (PgDistShardRelationId == InvalidOid) + { + PgDistShardRelationId = get_relname_relid("pg_dist_shard", PG_CATALOG_NAMESPACE); + } + return PgDistShardRelationId; +} + + +/* + * DistShardRelationId returns the relation id of the pg_dist_shard + */ +static Oid +DistShardShardidIndexId(void) +{ + if (PgDistShardShardidIndexId == InvalidOid) + { + PgDistShardShardidIndexId = get_relname_relid("pg_dist_shard_shardid_index", + PG_CATALOG_NAMESPACE); + } + return PgDistShardShardidIndexId; +} + + +/* + * DistShardRelationId returns the relation id of the pg_dist_shard + */ +static Oid +DistPartitionRelationId(void) +{ + if (PgDistPartitionRelationId == InvalidOid) + { + PgDistPartitionRelationId = get_relname_relid("pg_dist_partition", + PG_CATALOG_NAMESPACE); + } + return PgDistPartitionRelationId; +} + + +static Oid +DistPartitionLogicalRelidIndexId(void) +{ + if (PgDistPartitionLogicalrelidIndexId == InvalidOid) + { + PgDistPartitionLogicalrelidIndexId = get_relname_relid( + "pg_dist_partition_logicalrelid_index", PG_CATALOG_NAMESPACE); + } + return PgDistPartitionLogicalrelidIndexId; +} + + +/* + * CdcIsCoordinator function returns true if this node is identified as the + * schema/coordinator/master node of the cluster. + */ +bool +CdcIsCoordinator(void) +{ + return (CdcGetLocalGroupId() == COORDINATOR_GROUP_ID); +} + + +/* + * CdcCitusHasBeenLoaded function returns true if the citus extension has been loaded. + */ +bool +CdcCitusHasBeenLoaded() +{ + if (!IsCitusExtensionLoaded) + { + IsCitusExtensionLoaded = (get_extension_oid("citus", true) != InvalidOid); + } + + return IsCitusExtensionLoaded; +} + + +/* + * ExtractShardIdFromTableName tries to extract shard id from the given table name, + * and returns the shard id if table name is formatted as shard name. + * Else, the function returns INVALID_SHARD_ID. + */ +uint64 +CdcExtractShardIdFromTableName(const char *tableName, bool missingOk) +{ + char *shardIdStringEnd = NULL; + + /* find the last underscore and increment for shardId string */ + char *shardIdString = strrchr(tableName, SHARD_NAME_SEPARATOR); + if (shardIdString == NULL && !missingOk) + { + ereport(ERROR, (errmsg("could not extract shardId from table name \"%s\"", + tableName))); + } + else if (shardIdString == NULL && missingOk) + { + return INVALID_SHARD_ID; + } + + shardIdString++; + + errno = 0; + uint64 shardId = strtoull(shardIdString, &shardIdStringEnd, 0); + + if (errno != 0 || (*shardIdStringEnd != '\0')) + { + if (!missingOk) + { + ereport(ERROR, (errmsg("could not extract shardId from table name \"%s\"", + tableName))); + } + else + { + return INVALID_SHARD_ID; + } + } + + return shardId; +} + + +/* + * CdcGetLocalGroupId returns the group identifier of the local node. The function assumes + * that pg_dist_local_node_group has exactly one row and has at least one column. + * Otherwise, the function errors out. + */ +static int32 +CdcGetLocalGroupId(void) +{ + ScanKeyData scanKey[1]; + int scanKeyCount = 0; + int32 groupId = 0; + + /* + * Already set the group id, no need to read the heap again. + */ + if (LocalGroupId != -1) + { + return LocalGroupId; + } + + Oid localGroupTableOid = DistLocalGroupIdRelationId(); + if (localGroupTableOid == InvalidOid) + { + return 0; + } + + Relation pgDistLocalGroupId = table_open(localGroupTableOid, AccessShareLock); + + SysScanDesc scanDescriptor = systable_beginscan(pgDistLocalGroupId, + InvalidOid, false, + NULL, scanKeyCount, scanKey); + + TupleDesc tupleDescriptor = RelationGetDescr(pgDistLocalGroupId); + + HeapTuple heapTuple = systable_getnext(scanDescriptor); + + if (HeapTupleIsValid(heapTuple)) + { + bool isNull = false; + Datum groupIdDatum = heap_getattr(heapTuple, + Anum_pg_dist_local_groupid, + tupleDescriptor, &isNull); + + groupId = DatumGetInt32(groupIdDatum); + + /* set the local cache variable */ + LocalGroupId = groupId; + } + else + { + /* + * Upgrade is happening. When upgrading postgres, pg_dist_local_group is + * temporarily empty before citus_finish_pg_upgrade() finishes execution. + */ + groupId = GROUP_ID_UPGRADING; + } + + systable_endscan(scanDescriptor); + table_close(pgDistLocalGroupId, AccessShareLock); + + return groupId; +} + + +/* + * CdcLookupShardRelationFromCatalog returns the logical relation oid a shard belongs to. + * + * Errors out if the shardId does not exist and missingOk is false. + * Returns InvalidOid if the shardId does not exist and missingOk is true. + */ +Oid +CdcLookupShardRelationFromCatalog(int64 shardId, bool missingOk) +{ + ScanKeyData scanKey[1]; + int scanKeyCount = 1; + Form_pg_dist_shard shardForm = NULL; + Relation pgDistShard = table_open(DistShardRelationId(), AccessShareLock); + Oid relationId = InvalidOid; + + ScanKeyInit(&scanKey[0], Anum_pg_dist_shard_shardid, + BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(shardId)); + + SysScanDesc scanDescriptor = systable_beginscan(pgDistShard, + DistShardShardidIndexId(), true, + NULL, scanKeyCount, scanKey); + + HeapTuple heapTuple = systable_getnext(scanDescriptor); + if (!HeapTupleIsValid(heapTuple) && !missingOk) + { + ereport(ERROR, (errmsg("could not find valid entry for shard " + UINT64_FORMAT, shardId))); + } + + if (!HeapTupleIsValid(heapTuple)) + { + relationId = InvalidOid; + } + else + { + shardForm = (Form_pg_dist_shard) GETSTRUCT(heapTuple); + relationId = shardForm->logicalrelid; + } + + systable_endscan(scanDescriptor); + table_close(pgDistShard, NoLock); + + return relationId; +} + + +/* + * CdcPgDistPartitionTupleViaCatalog is a helper function that searches + * pg_dist_partition for the given relationId. The caller is responsible + * for ensuring that the returned heap tuple is valid before accessing + * its fields. + */ +static HeapTuple +CdcPgDistPartitionTupleViaCatalog(Oid relationId) +{ + const int scanKeyCount = 1; + ScanKeyData scanKey[1]; + bool indexOK = true; + + Relation pgDistPartition = table_open(DistPartitionRelationId(), AccessShareLock); + + ScanKeyInit(&scanKey[0], Anum_pg_dist_partition_logicalrelid, + BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relationId)); + + SysScanDesc scanDescriptor = systable_beginscan(pgDistPartition, + DistPartitionLogicalRelidIndexId(), + indexOK, NULL, scanKeyCount, scanKey); + + HeapTuple partitionTuple = systable_getnext(scanDescriptor); + + if (HeapTupleIsValid(partitionTuple)) + { + /* callers should have the tuple in their memory contexts */ + partitionTuple = heap_copytuple(partitionTuple); + } + + systable_endscan(scanDescriptor); + table_close(pgDistPartition, AccessShareLock); + + return partitionTuple; +} + + +/* + * CdcPartitionMethodViaCatalog gets a relationId and returns the partition + * method column from pg_dist_partition via reading from catalog. + */ +char +CdcPartitionMethodViaCatalog(Oid relationId) +{ + HeapTuple partitionTuple = CdcPgDistPartitionTupleViaCatalog(relationId); + if (!HeapTupleIsValid(partitionTuple)) + { + return DISTRIBUTE_BY_INVALID; + } + + Datum datumArray[Natts_pg_dist_partition]; + bool isNullArray[Natts_pg_dist_partition]; + + Relation pgDistPartition = table_open(DistPartitionRelationId(), AccessShareLock); + + TupleDesc tupleDescriptor = RelationGetDescr(pgDistPartition); + heap_deform_tuple(partitionTuple, tupleDescriptor, datumArray, isNullArray); + + if (isNullArray[Anum_pg_dist_partition_partmethod - 1]) + { + /* partition method cannot be NULL, still let's make sure */ + heap_freetuple(partitionTuple); + table_close(pgDistPartition, NoLock); + return DISTRIBUTE_BY_INVALID; + } + + Datum partitionMethodDatum = datumArray[Anum_pg_dist_partition_partmethod - 1]; + char partitionMethodChar = DatumGetChar(partitionMethodDatum); + + heap_freetuple(partitionTuple); + table_close(pgDistPartition, NoLock); + + return partitionMethodChar; +} + + +/* + * RemoveCitusDecodersFromPaths removes a path ending in citus_decoders + * from the given input paths. + */ +char * +RemoveCitusDecodersFromPaths(char *paths) +{ + if (strlen(paths) == 0) + { + /* dynamic_library_path is empty */ + return paths; + } + + StringInfo newPaths = makeStringInfo(); + + char *remainingPaths = paths; + + for (;;) + { + int pathLength = 0; + + char *pathStart = first_path_var_separator(remainingPaths); + if (pathStart == remainingPaths) + { + /* + * This will error out in find_in_dynamic_libpath, return + * original value here. + */ + return paths; + } + else if (pathStart == NULL) + { + /* final path */ + pathLength = strlen(remainingPaths); + } + else + { + /* more paths remaining */ + pathLength = pathStart - remainingPaths; + } + + char *currentPath = palloc(pathLength + 1); + strlcpy(currentPath, remainingPaths, pathLength + 1); + canonicalize_path(currentPath); + + if (!pg_str_endswith(currentPath, "/citus_decoders")) + { + appendStringInfo(newPaths, "%s%s", newPaths->len > 0 ? ":" : "", currentPath); + } + + if (remainingPaths[pathLength] == '\0') + { + /* end of string */ + break; + } + + remainingPaths += pathLength + 1; + } + + return newPaths->data; +} diff --git a/src/backend/distributed/cdc/cdc_decoder_utils.h b/src/backend/distributed/cdc/cdc_decoder_utils.h new file mode 100644 index 000000000..d30500de4 --- /dev/null +++ b/src/backend/distributed/cdc/cdc_decoder_utils.h @@ -0,0 +1,34 @@ +/*------------------------------------------------------------------------- + * + * cdc_decoder_utils.h + * Utility functions and declerations for cdc decoder. + * + * Copyright (c) Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#ifndef CITUS_CDC_DECODER_H +#define CITUS_CDC_DECODER_H + +#include "postgres.h" +#include "fmgr.h" +#include "replication/logical.h" +#include "c.h" + +#define InvalidRepOriginId 0 +#define INVALID_SHARD_ID 0 + +bool CdcIsCoordinator(void); + +uint64 CdcExtractShardIdFromTableName(const char *tableName, bool missingOk); + +Oid CdcLookupShardRelationFromCatalog(int64 shardId, bool missingOk); + +char CdcPartitionMethodViaCatalog(Oid relationId); + +bool CdcCitusHasBeenLoaded(void); + +char * RemoveCitusDecodersFromPaths(char *paths); + +#endif /* CITUS_CDC_DECODER_UTILS_H */ diff --git a/src/backend/distributed/commands/alter_table.c b/src/backend/distributed/commands/alter_table.c index 7f86509cc..5ed82f760 100644 --- a/src/backend/distributed/commands/alter_table.c +++ b/src/backend/distributed/commands/alter_table.c @@ -55,6 +55,7 @@ #include "distributed/multi_partitioning_utils.h" #include "distributed/reference_table_utils.h" #include "distributed/relation_access_tracking.h" +#include "distributed/replication_origin_session_utils.h" #include "distributed/shared_library_init.h" #include "distributed/shard_utils.h" #include "distributed/worker_protocol.h" @@ -183,6 +184,7 @@ static TableConversionReturn * AlterDistributedTable(TableConversionParameters * static TableConversionReturn * AlterTableSetAccessMethod( TableConversionParameters *params); static TableConversionReturn * ConvertTable(TableConversionState *con); +static TableConversionReturn * ConvertTableInternal(TableConversionState *con); static bool SwitchToSequentialAndLocalExecutionIfShardNameTooLong(char *relationName, char *longestShardName); static void DropIndexesNotSupportedByColumnar(Oid relationId, @@ -215,7 +217,10 @@ static bool WillRecreateForeignKeyToReferenceTable(Oid relationId, CascadeToColocatedOption cascadeOption); static void WarningsForDroppingForeignKeysWithDistributedTables(Oid relationId); static void ErrorIfUnsupportedCascadeObjects(Oid relationId); +static List * WrapTableDDLCommands(List *commandStrings); static bool DoesCascadeDropUnsupportedObject(Oid classId, Oid id, HTAB *nodeMap); +static TableConversionReturn * CopyTableConversionReturnIntoCurrentContext( + TableConversionReturn *tableConversionReturn); PG_FUNCTION_INFO_V1(undistribute_table); PG_FUNCTION_INFO_V1(alter_distributed_table); @@ -402,7 +407,11 @@ UndistributeTable(TableConversionParameters *params) params->conversionType = UNDISTRIBUTE_TABLE; params->shardCountIsNull = true; TableConversionState *con = CreateTableConversion(params); - return ConvertTable(con); + + SetupReplicationOriginLocalSession(); + TableConversionReturn *conv = ConvertTable(con); + ResetReplicationOriginLocalSession(); + return conv; } @@ -441,6 +450,7 @@ AlterDistributedTable(TableConversionParameters *params) ereport(DEBUG1, (errmsg("setting multi shard modify mode to sequential"))); SetLocalMultiShardModifyModeToSequential(); } + return ConvertTable(con); } @@ -511,9 +521,9 @@ AlterTableSetAccessMethod(TableConversionParameters *params) /* - * ConvertTable is used for converting a table into a new table with different properties. - * The conversion is done by creating a new table, moving everything to the new table and - * dropping the old one. So the oid of the table is not preserved. + * ConvertTableInternal is used for converting a table into a new table with different + * properties. The conversion is done by creating a new table, moving everything to the + * new table and dropping the old one. So the oid of the table is not preserved. * * The new table will have the same name, columns and rows. It will also have partitions, * views, sequences of the old table. Finally it will have everything created by @@ -532,7 +542,7 @@ AlterTableSetAccessMethod(TableConversionParameters *params) * in case you add a new way to return from this function. */ TableConversionReturn * -ConvertTable(TableConversionState *con) +ConvertTableInternal(TableConversionState *con) { InTableTypeConversionFunctionCall = true; @@ -595,9 +605,18 @@ ConvertTable(TableConversionState *con) List *justBeforeDropCommands = NIL; List *attachPartitionCommands = NIL; - postLoadCommands = - list_concat(postLoadCommands, - GetViewCreationTableDDLCommandsOfTable(con->relationId)); + List *createViewCommands = GetViewCreationCommandsOfTable(con->relationId); + + postLoadCommands = list_concat(postLoadCommands, + WrapTableDDLCommands(createViewCommands)); + + /* need to add back to publications after dropping the original table */ + bool isAdd = true; + List *alterPublicationCommands = + GetAlterPublicationDDLCommandsForTable(con->relationId, isAdd); + + postLoadCommands = list_concat(postLoadCommands, + WrapTableDDLCommands(alterPublicationCommands)); List *foreignKeyCommands = NIL; if (con->conversionType == ALTER_DISTRIBUTED_TABLE) @@ -800,9 +819,21 @@ ConvertTable(TableConversionState *con) ExecuteQueryViaSPI(tableConstructionSQL, SPI_OK_UTILITY); } + /* + * when there are many partitions, each call to ProcessUtilityParseTree + * accumulates used memory. Free context after each call. + */ + MemoryContext citusPerPartitionContext = + AllocSetContextCreate(CurrentMemoryContext, + "citus_per_partition_context", + ALLOCSET_DEFAULT_SIZES); + MemoryContext oldContext = MemoryContextSwitchTo(citusPerPartitionContext); + char *attachPartitionCommand = NULL; foreach_ptr(attachPartitionCommand, attachPartitionCommands) { + MemoryContextReset(citusPerPartitionContext); + Node *parseTree = ParseTreeNode(attachPartitionCommand); ProcessUtilityParseTree(parseTree, attachPartitionCommand, @@ -810,6 +841,9 @@ ConvertTable(TableConversionState *con) NULL, None_Receiver, NULL); } + MemoryContextSwitchTo(oldContext); + MemoryContextDelete(citusPerPartitionContext); + if (isPartitionTable) { ExecuteQueryViaSPI(attachToParentCommand, SPI_OK_UTILITY); @@ -869,10 +903,77 @@ ConvertTable(TableConversionState *con) SetLocalEnableLocalReferenceForeignKeys(oldEnableLocalReferenceForeignKeys); InTableTypeConversionFunctionCall = false; + return ret; } +/* + * CopyTableConversionReturnIntoCurrentContext copies given tableConversionReturn + * into CurrentMemoryContext. + */ +static TableConversionReturn * +CopyTableConversionReturnIntoCurrentContext(TableConversionReturn *tableConversionReturn) +{ + TableConversionReturn *tableConversionReturnCopy = NULL; + if (tableConversionReturn) + { + tableConversionReturnCopy = palloc0(sizeof(TableConversionReturn)); + List *copyForeignKeyCommands = NIL; + char *foreignKeyCommand = NULL; + foreach_ptr(foreignKeyCommand, tableConversionReturn->foreignKeyCommands) + { + char *copyForeignKeyCommand = MemoryContextStrdup(CurrentMemoryContext, + foreignKeyCommand); + copyForeignKeyCommands = lappend(copyForeignKeyCommands, + copyForeignKeyCommand); + } + tableConversionReturnCopy->foreignKeyCommands = copyForeignKeyCommands; + } + + return tableConversionReturnCopy; +} + + +/* + * ConvertTable is a wrapper for ConvertTableInternal to persist only + * TableConversionReturn and delete all other allocations. + */ +static TableConversionReturn * +ConvertTable(TableConversionState *con) +{ + /* + * We do not allow alter_distributed_table and undistribute_table operations + * for tables with identity columns. This is because we do not have a proper way + * of keeping sequence states consistent across the cluster. + */ + ErrorIfTableHasIdentityColumn(con->relationId); + + /* + * when there are many partitions or colocated tables, memory usage is + * accumulated. Free context for each call to ConvertTable. + */ + MemoryContext convertTableContext = + AllocSetContextCreate(CurrentMemoryContext, + "citus_convert_table_context", + ALLOCSET_DEFAULT_SIZES); + MemoryContext oldContext = MemoryContextSwitchTo(convertTableContext); + + TableConversionReturn *tableConversionReturn = ConvertTableInternal(con); + + MemoryContextSwitchTo(oldContext); + + /* persist TableConversionReturn in oldContext */ + TableConversionReturn *tableConversionReturnCopy = + CopyTableConversionReturnIntoCurrentContext(tableConversionReturn); + + /* delete convertTableContext */ + MemoryContextDelete(convertTableContext); + + return tableConversionReturnCopy; +} + + /* * DropIndexesNotSupportedByColumnar is a helper function used during accces * method conversion to drop the indexes that are not supported by columnarAM. @@ -1268,8 +1369,7 @@ CreateCitusTableLike(TableConversionState *con) } else if (IsCitusTableType(con->relationId, REFERENCE_TABLE)) { - CreateDistributedTable(con->newRelationId, NULL, DISTRIBUTE_BY_NONE, 0, false, - NULL); + CreateReferenceTable(con->newRelationId); } else if (IsCitusTableType(con->relationId, CITUS_LOCAL_TABLE)) { @@ -1410,17 +1510,16 @@ GetViewCreationCommandsOfTable(Oid relationId) /* - * GetViewCreationTableDDLCommandsOfTable is the same as GetViewCreationCommandsOfTable, - * but the returned list includes objects of TableDDLCommand's, not strings. + * WrapTableDDLCommands takes a list of command strings and wraps them + * in TableDDLCommand structs. */ -List * -GetViewCreationTableDDLCommandsOfTable(Oid relationId) +static List * +WrapTableDDLCommands(List *commandStrings) { - List *commands = GetViewCreationCommandsOfTable(relationId); List *tableDDLCommands = NIL; char *command = NULL; - foreach_ptr(command, commands) + foreach_ptr(command, commandStrings) { tableDDLCommands = lappend(tableDDLCommands, makeTableDDLCommandString(command)); } @@ -1523,96 +1622,6 @@ CreateMaterializedViewDDLCommand(Oid matViewOid) } -/* - * This function marks all the identity sequences as distributed on the given table. - */ -static void -MarkIdentitiesAsDistributed(Oid targetRelationId) -{ - Relation relation = relation_open(targetRelationId, AccessShareLock); - TupleDesc tupleDescriptor = RelationGetDescr(relation); - relation_close(relation, NoLock); - - bool missingSequenceOk = false; - - for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts; - attributeIndex++) - { - Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex); - - if (attributeForm->attidentity) - { - Oid seqOid = getIdentitySequence(targetRelationId, attributeForm->attnum, - missingSequenceOk); - - ObjectAddress seqAddress = { 0 }; - ObjectAddressSet(seqAddress, RelationRelationId, seqOid); - MarkObjectDistributed(&seqAddress); - } - } -} - - -/* - * This function returns sql statements to rename identites on the given table - */ -static void -PrepareRenameIdentitiesCommands(Oid sourceRelationId, Oid targetRelationId, - List **outCoordinatorCommands, List **outWorkerCommands) -{ - Relation targetRelation = relation_open(targetRelationId, AccessShareLock); - TupleDesc targetTupleDescriptor = RelationGetDescr(targetRelation); - relation_close(targetRelation, NoLock); - - bool missingSequenceOk = false; - - for (int attributeIndex = 0; attributeIndex < targetTupleDescriptor->natts; - attributeIndex++) - { - Form_pg_attribute attributeForm = TupleDescAttr(targetTupleDescriptor, - attributeIndex); - - if (attributeForm->attidentity) - { - char *columnName = NameStr(attributeForm->attname); - - Oid targetSequenceOid = getIdentitySequence(targetRelationId, - attributeForm->attnum, - missingSequenceOk); - char *targetSequenceName = generate_relation_name(targetSequenceOid, NIL); - - Oid sourceSequenceOid = getIdentitySequence(sourceRelationId, - attributeForm->attnum, - missingSequenceOk); - char *sourceSequenceName = generate_relation_name(sourceSequenceOid, NIL); - - /* to rename sequence on the coordinator */ - *outCoordinatorCommands = lappend(*outCoordinatorCommands, psprintf( - "SET citus.enable_ddl_propagation TO OFF; ALTER SEQUENCE %s RENAME TO %s; RESET citus.enable_ddl_propagation;", - quote_identifier( - targetSequenceName), - quote_identifier( - sourceSequenceName))); - - /* update workers to use existing sequence and drop the new one generated by PG */ - bool missingTableOk = true; - *outWorkerCommands = lappend(*outWorkerCommands, - GetAlterColumnWithNextvalDefaultCmd( - sourceSequenceOid, sourceRelationId, - columnName, - missingTableOk)); - - - /* drop the sequence generated by identity column */ - *outWorkerCommands = lappend(*outWorkerCommands, psprintf( - "DROP SEQUENCE IF EXISTS %s", - quote_identifier( - targetSequenceName))); - } - } -} - - /* * ReplaceTable replaces the source table with the target table. * It moves all the rows of the source table to target table with INSERT SELECT. @@ -1671,24 +1680,6 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands, ExecuteQueryViaSPI(query->data, SPI_OK_INSERT); } - /* - * Drop identity dependencies (sequences marked as DEPENDENCY_INTERNAL) on the workers - * to keep their states after the source table is dropped. - */ - List *ownedIdentitySequences = getOwnedSequences_internal(sourceId, 0, - DEPENDENCY_INTERNAL); - if (ownedIdentitySequences != NIL && ShouldSyncTableMetadata(sourceId)) - { - char *qualifiedTableName = quote_qualified_identifier(schemaName, sourceName); - StringInfo command = makeStringInfo(); - - appendStringInfo(command, - "SELECT pg_catalog.worker_drop_sequence_dependency(%s);", - quote_literal_cstr(qualifiedTableName)); - - SendCommandToWorkersWithMetadata(command->data); - } - /* * Modify regular sequence dependencies (sequences marked as DEPENDENCY_AUTO) */ @@ -1748,23 +1739,6 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands, quote_qualified_identifier(schemaName, sourceName)))); } - /* - * We need to prepare rename identities commands before dropping the original table, - * otherwise we can't find the original names of the identity sequences. - * We prepare separate commands for the coordinator and the workers because: - * In the coordinator, we simply need to rename the identity sequences - * to their names on the old table, because right now the identity - * sequences have default names generated by Postgres with the creation of the new table - * In the workers, we have not dropped the original identity sequences, - * so what we do is we alter the columns and set their default to the - * original identity sequences, and after that we drop the new sequences. - */ - List *coordinatorCommandsToRenameIdentites = NIL; - List *workerCommandsToRenameIdentites = NIL; - PrepareRenameIdentitiesCommands(sourceId, targetId, - &coordinatorCommandsToRenameIdentites, - &workerCommandsToRenameIdentites); - resetStringInfo(query); appendStringInfo(query, "DROP %sTABLE %s CASCADE", IsForeignTable(sourceId) ? "FOREIGN " : "", @@ -1782,27 +1756,6 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands, quote_qualified_identifier(schemaName, targetName), quote_identifier(sourceName)); ExecuteQueryViaSPI(query->data, SPI_OK_UTILITY); - - char *coordinatorCommand = NULL; - foreach_ptr(coordinatorCommand, coordinatorCommandsToRenameIdentites) - { - ExecuteQueryViaSPI(coordinatorCommand, SPI_OK_UTILITY); - } - - char *workerCommand = NULL; - foreach_ptr(workerCommand, workerCommandsToRenameIdentites) - { - SendCommandToWorkersWithMetadata(workerCommand); - } - - /* - * To preserve identity sequences states in case of redistributing the table again, - * we don't drop them when we undistribute a table. To maintain consistency and - * avoid future problems if we redistribute the table, we want to apply all changes happening to - * the identity sequence in the coordinator to their corresponding sequences in the workers as well. - * That's why we have to mark identity sequences as distributed - */ - MarkIdentitiesAsDistributed(targetId); } diff --git a/src/backend/distributed/commands/citus_add_local_table_to_metadata.c b/src/backend/distributed/commands/citus_add_local_table_to_metadata.c index bb4ab7473..41ec7e1b1 100644 --- a/src/backend/distributed/commands/citus_add_local_table_to_metadata.c +++ b/src/backend/distributed/commands/citus_add_local_table_to_metadata.c @@ -85,6 +85,7 @@ static void DropRelationTruncateTriggers(Oid relationId); static char * GetDropTriggerCommand(Oid relationId, char *triggerName); static void DropViewsOnTable(Oid relationId); static void DropIdentitiesOnTable(Oid relationId); +static void DropTableFromPublications(Oid relationId); static List * GetRenameStatsCommandList(List *statsOidList, uint64 shardId); static List * ReversedOidList(List *oidList); static void AppendExplicitIndexIdsToList(Form_pg_index indexForm, @@ -338,6 +339,10 @@ CreateCitusLocalTable(Oid relationId, bool cascadeViaForeignKeys, bool autoConve List *shellTableDDLEvents = GetShellTableDDLEventsForCitusLocalTable(relationId); List *tableViewCreationCommands = GetViewCreationCommandsOfTable(relationId); + bool isAdd = true; + List *alterPublicationCommands = + GetAlterPublicationDDLCommandsForTable(relationId, isAdd); + char *relationName = get_rel_name(relationId); Oid relationSchemaId = get_rel_namespace(relationId); @@ -347,6 +352,12 @@ CreateCitusLocalTable(Oid relationId, bool cascadeViaForeignKeys, bool autoConve */ DropIdentitiesOnTable(relationId); + /* + * We do not want the shard to be in the publication (subscribers are + * unlikely to recognize it). + */ + DropTableFromPublications(relationId); + /* below we convert relation with relationId to the shard relation */ uint64 shardId = ConvertLocalTableToShard(relationId); @@ -363,6 +374,11 @@ CreateCitusLocalTable(Oid relationId, bool cascadeViaForeignKeys, bool autoConve */ ExecuteAndLogUtilityCommandListInTableTypeConversionViaSPI(tableViewCreationCommands); + /* + * Execute the publication creation commands with the shell table. + */ + ExecuteAndLogUtilityCommandListInTableTypeConversionViaSPI(alterPublicationCommands); + /* * Set shellRelationId as the relation with relationId now points * to the shard relation. @@ -1131,7 +1147,7 @@ DropIdentitiesOnTable(Oid relationId) { Relation relation = relation_open(relationId, AccessShareLock); TupleDesc tupleDescriptor = RelationGetDescr(relation); - relation_close(relation, NoLock); + List *dropCommandList = NIL; for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts; attributeIndex++) @@ -1151,15 +1167,38 @@ DropIdentitiesOnTable(Oid relationId) qualifiedTableName, columnName); - /* - * We need to disable/enable ddl propagation for this command, to prevent - * sending unnecessary ALTER COLUMN commands for partitions, to MX workers. - */ - ExecuteAndLogUtilityCommandList(list_make3(DISABLE_DDL_PROPAGATION, - dropCommand->data, - ENABLE_DDL_PROPAGATION)); + dropCommandList = lappend(dropCommandList, dropCommand->data); } } + + relation_close(relation, NoLock); + + char *dropCommand = NULL; + foreach_ptr(dropCommand, dropCommandList) + { + /* + * We need to disable/enable ddl propagation for this command, to prevent + * sending unnecessary ALTER COLUMN commands for partitions, to MX workers. + */ + ExecuteAndLogUtilityCommandList(list_make3(DISABLE_DDL_PROPAGATION, + dropCommand, + ENABLE_DDL_PROPAGATION)); + } +} + + +/* + * DropTableFromPublications drops the table from all of its publications. + */ +static void +DropTableFromPublications(Oid relationId) +{ + bool isAdd = false; + + List *alterPublicationCommands = + GetAlterPublicationDDLCommandsForTable(relationId, isAdd); + + ExecuteAndLogUtilityCommandList(alterPublicationCommands); } diff --git a/src/backend/distributed/commands/create_distributed_table.c b/src/backend/distributed/commands/create_distributed_table.c index 0bea11034..a4fb89b87 100644 --- a/src/backend/distributed/commands/create_distributed_table.c +++ b/src/backend/distributed/commands/create_distributed_table.c @@ -94,6 +94,28 @@ #include "utils/syscache.h" #include "utils/inval.h" + +/* common params that apply to all Citus table types */ +typedef struct +{ + char distributionMethod; + char replicationModel; +} CitusTableParams; + + +/* + * Params that only apply to distributed tables, i.e., the ones that are + * known as DISTRIBUTED_TABLE by Citus metadata. + */ +typedef struct +{ + int shardCount; + bool shardCountIsStrict; + char *colocateWithTableName; + char *distributionColumnName; +} DistributedTableParams; + + /* * once every LOG_PER_TUPLE_AMOUNT, the copy will be logged. */ @@ -106,17 +128,22 @@ static void CreateDistributedTableConcurrently(Oid relationId, char *colocateWithTableName, int shardCount, bool shardCountIsStrict); -static char DecideReplicationModel(char distributionMethod, char *colocateWithTableName); +static char DecideDistTableReplicationModel(char distributionMethod, + char *colocateWithTableName); static List * HashSplitPointsForShardList(List *shardList); static List * HashSplitPointsForShardCount(int shardCount); static List * WorkerNodesForShardList(List *shardList); static List * RoundRobinWorkerNodeList(List *workerNodeList, int listLength); +static CitusTableParams DecideCitusTableParams(CitusTableType tableType, + DistributedTableParams * + distributedTableParams); +static void CreateCitusTable(Oid relationId, CitusTableType tableType, + DistributedTableParams *distributedTableParams); static void CreateHashDistributedTableShards(Oid relationId, int shardCount, Oid colocatedTableId, bool localTableEmpty); -static uint32 ColocationIdForNewTable(Oid relationId, Var *distributionColumn, - char distributionMethod, char replicationModel, - int shardCount, bool shardCountIsStrict, - char *colocateWithTableName); +static uint32 ColocationIdForNewTable(Oid relationId, CitusTableType tableType, + DistributedTableParams *distributedTableParams, + Var *distributionColumn); static void EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn, char distributionMethod, uint32 colocationId, char replicationModel); @@ -377,8 +404,8 @@ CreateDistributedTableConcurrently(Oid relationId, char *distributionColumnName, EnsureForeignKeysForDistributedTableConcurrently(relationId); - char replicationModel = DecideReplicationModel(distributionMethod, - colocateWithTableName); + char replicationModel = DecideDistTableReplicationModel(distributionMethod, + colocateWithTableName); /* * we fail transaction before local table conversion if the table could not be colocated with @@ -622,8 +649,8 @@ static void EnsureColocateWithTableIsValid(Oid relationId, char distributionMethod, char *distributionColumnName, char *colocateWithTableName) { - char replicationModel = DecideReplicationModel(distributionMethod, - colocateWithTableName); + char replicationModel = DecideDistTableReplicationModel(distributionMethod, + colocateWithTableName); /* * we fail transaction before local table conversion if the table could not be colocated with @@ -860,9 +887,6 @@ create_reference_table(PG_FUNCTION_ARGS) CheckCitusVersion(ERROR); Oid relationId = PG_GETARG_OID(0); - char *colocateWithTableName = NULL; - char *distributionColumnName = NULL; - EnsureCitusTableCanBeCreated(relationId); /* enable create_reference_table on an empty node */ @@ -895,8 +919,7 @@ create_reference_table(PG_FUNCTION_ARGS) errdetail("There are no active worker nodes."))); } - CreateDistributedTable(relationId, distributionColumnName, DISTRIBUTE_BY_NONE, - ShardCount, false, colocateWithTableName); + CreateReferenceTable(relationId); PG_RETURN_VOID(); } @@ -951,18 +974,90 @@ EnsureRelationExists(Oid relationId) /* - * CreateDistributedTable creates distributed table in the given configuration. + * CreateReferenceTable is a wrapper around CreateCitusTable that creates a + * distributed table. + */ +void +CreateDistributedTable(Oid relationId, char *distributionColumnName, + char distributionMethod, + int shardCount, bool shardCountIsStrict, + char *colocateWithTableName) +{ + CitusTableType tableType; + switch (distributionMethod) + { + case DISTRIBUTE_BY_HASH: + { + tableType = HASH_DISTRIBUTED; + break; + } + + case DISTRIBUTE_BY_APPEND: + { + tableType = APPEND_DISTRIBUTED; + break; + } + + case DISTRIBUTE_BY_RANGE: + { + tableType = RANGE_DISTRIBUTED; + break; + } + + default: + { + ereport(ERROR, (errmsg("unexpected distribution method when " + "deciding Citus table type"))); + break; + } + } + + DistributedTableParams distributedTableParams = { + .colocateWithTableName = colocateWithTableName, + .shardCount = shardCount, + .shardCountIsStrict = shardCountIsStrict, + .distributionColumnName = distributionColumnName + }; + CreateCitusTable(relationId, tableType, &distributedTableParams); +} + + +/* + * CreateReferenceTable is a wrapper around CreateCitusTable that creates a + * reference table. + */ +void +CreateReferenceTable(Oid relationId) +{ + CreateCitusTable(relationId, REFERENCE_TABLE, NULL); +} + + +/* + * CreateCitusTable is the internal method that creates a Citus table in + * given configuration. + * + * DistributedTableParams should be non-null only if we're creating a distributed + * table. + * * This functions contains all necessary logic to create distributed tables. It * performs necessary checks to ensure distributing the table is safe. If it is * safe to distribute the table, this function creates distributed table metadata, * creates shards and copies local data to shards. This function also handles * partitioned tables by distributing its partitions as well. */ -void -CreateDistributedTable(Oid relationId, char *distributionColumnName, - char distributionMethod, int shardCount, - bool shardCountIsStrict, char *colocateWithTableName) +static void +CreateCitusTable(Oid relationId, CitusTableType tableType, + DistributedTableParams *distributedTableParams) { + if ((tableType == HASH_DISTRIBUTED || tableType == APPEND_DISTRIBUTED || + tableType == RANGE_DISTRIBUTED) != (distributedTableParams != NULL)) + { + ereport(ERROR, (errmsg("distributed table params must be provided " + "when creating a distributed table and must " + "not be otherwise"))); + } + /* * EnsureTableNotDistributed errors out when relation is a citus table but * we don't want to ask user to first undistribute their citus local tables @@ -988,11 +1083,8 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName, * that ALTER TABLE hook does the necessary job, which means converting * local tables to citus local tables to properly support such foreign * keys. - * - * This function does not expect to create Citus local table, so we blindly - * create reference table when the method is DISTRIBUTE_BY_NONE. */ - else if (distributionMethod == DISTRIBUTE_BY_NONE && + else if (tableType == REFERENCE_TABLE && ShouldEnableLocalReferenceForeignKeys() && HasForeignKeyWithLocalTable(relationId)) { @@ -1022,24 +1114,29 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName, PropagatePrerequisiteObjectsForDistributedTable(relationId); - char replicationModel = DecideReplicationModel(distributionMethod, - colocateWithTableName); + Var *distributionColumn = NULL; + if (distributedTableParams) + { + distributionColumn = BuildDistributionKeyFromColumnName(relationId, + distributedTableParams-> + distributionColumnName, + NoLock); + } - Var *distributionColumn = BuildDistributionKeyFromColumnName(relationId, - distributionColumnName, - NoLock); + CitusTableParams citusTableParams = DecideCitusTableParams(tableType, + distributedTableParams); /* * ColocationIdForNewTable assumes caller acquires lock on relationId. In our case, * our caller already acquired lock on relationId. */ - uint32 colocationId = ColocationIdForNewTable(relationId, distributionColumn, - distributionMethod, replicationModel, - shardCount, shardCountIsStrict, - colocateWithTableName); + uint32 colocationId = ColocationIdForNewTable(relationId, tableType, + distributedTableParams, + distributionColumn); - EnsureRelationCanBeDistributed(relationId, distributionColumn, distributionMethod, - colocationId, replicationModel); + EnsureRelationCanBeDistributed(relationId, distributionColumn, + citusTableParams.distributionMethod, + colocationId, citusTableParams.replicationModel); /* * Make sure that existing reference tables have been replicated to all the nodes @@ -1068,8 +1165,10 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName, bool autoConverted = false; /* create an entry for distributed table in pg_dist_partition */ - InsertIntoPgDistPartition(relationId, distributionMethod, distributionColumn, - colocationId, replicationModel, autoConverted); + InsertIntoPgDistPartition(relationId, citusTableParams.distributionMethod, + distributionColumn, + colocationId, citusTableParams.replicationModel, + autoConverted); /* foreign tables do not support TRUNCATE trigger */ if (RegularTable(relationId)) @@ -1078,17 +1177,14 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName, } /* create shards for hash distributed and reference tables */ - if (distributionMethod == DISTRIBUTE_BY_HASH) + if (tableType == HASH_DISTRIBUTED) { - CreateHashDistributedTableShards(relationId, shardCount, colocatedTableId, + CreateHashDistributedTableShards(relationId, distributedTableParams->shardCount, + colocatedTableId, localTableEmpty); } - else if (distributionMethod == DISTRIBUTE_BY_NONE) + else if (tableType == REFERENCE_TABLE) { - /* - * This function does not expect to create Citus local table, so we blindly - * create reference table when the method is DISTRIBUTE_BY_NONE. - */ CreateReferenceTableShard(relationId); } @@ -1116,17 +1212,36 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName, char *relationName = get_rel_name(relationId); char *parentRelationName = quote_qualified_identifier(schemaName, relationName); + /* + * when there are many partitions, each call to CreateDistributedTable + * accumulates used memory. Create and free context for each call. + */ + MemoryContext citusPartitionContext = + AllocSetContextCreate(CurrentMemoryContext, + "citus_per_partition_context", + ALLOCSET_DEFAULT_SIZES); + MemoryContext oldContext = MemoryContextSwitchTo(citusPartitionContext); + foreach_oid(partitionRelationId, partitionList) { - CreateDistributedTable(partitionRelationId, distributionColumnName, - distributionMethod, shardCount, false, - parentRelationName); + MemoryContextReset(citusPartitionContext); + + DistributedTableParams childDistributedTableParams = { + .colocateWithTableName = parentRelationName, + .shardCount = distributedTableParams->shardCount, + .shardCountIsStrict = false, + .distributionColumnName = distributedTableParams->distributionColumnName, + }; + CreateCitusTable(partitionRelationId, tableType, + &childDistributedTableParams); } + + MemoryContextSwitchTo(oldContext); + MemoryContextDelete(citusPartitionContext); } /* copy over data for hash distributed and reference tables */ - if (distributionMethod == DISTRIBUTE_BY_HASH || - distributionMethod == DISTRIBUTE_BY_NONE) + if (tableType == HASH_DISTRIBUTED || tableType == REFERENCE_TABLE) { if (RegularTable(relationId)) { @@ -1145,6 +1260,70 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName, } +/* + * DecideCitusTableParams decides CitusTableParams based on given CitusTableType + * and DistributedTableParams if it's a distributed table. + * + * DistributedTableParams should be non-null only if CitusTableType corresponds + * to a distributed table. + */ +static +CitusTableParams +DecideCitusTableParams(CitusTableType tableType, + DistributedTableParams *distributedTableParams) +{ + CitusTableParams citusTableParams = { 0 }; + switch (tableType) + { + case HASH_DISTRIBUTED: + { + citusTableParams.distributionMethod = DISTRIBUTE_BY_HASH; + citusTableParams.replicationModel = + DecideDistTableReplicationModel(DISTRIBUTE_BY_HASH, + distributedTableParams-> + colocateWithTableName); + break; + } + + case APPEND_DISTRIBUTED: + { + citusTableParams.distributionMethod = DISTRIBUTE_BY_APPEND; + citusTableParams.replicationModel = + DecideDistTableReplicationModel(APPEND_DISTRIBUTED, + distributedTableParams-> + colocateWithTableName); + break; + } + + case RANGE_DISTRIBUTED: + { + citusTableParams.distributionMethod = DISTRIBUTE_BY_RANGE; + citusTableParams.replicationModel = + DecideDistTableReplicationModel(RANGE_DISTRIBUTED, + distributedTableParams-> + colocateWithTableName); + break; + } + + case REFERENCE_TABLE: + { + citusTableParams.distributionMethod = DISTRIBUTE_BY_NONE; + citusTableParams.replicationModel = REPLICATION_MODEL_2PC; + break; + } + + default: + { + ereport(ERROR, (errmsg("unexpected table type when deciding Citus " + "table params"))); + break; + } + } + + return citusTableParams; +} + + /* * PropagatePrerequisiteObjectsForDistributedTable ensures we can create shards * on all nodes by ensuring all dependent objects exist on all node. @@ -1190,7 +1369,7 @@ EnsureSequenceTypeSupported(Oid seqOid, Oid attributeTypeId, Oid ownerRelationId foreach_oid(citusTableId, citusTableIdList) { List *seqInfoList = NIL; - GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0); + GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0, DEPENDENCY_AUTO); SequenceInfo *seqInfo = NULL; foreach_ptr(seqInfo, seqInfoList) @@ -1267,7 +1446,7 @@ EnsureRelationHasCompatibleSequenceTypes(Oid relationId) { List *seqInfoList = NIL; - GetDependentSequencesWithRelation(relationId, &seqInfoList, 0); + GetDependentSequencesWithRelation(relationId, &seqInfoList, 0, DEPENDENCY_AUTO); EnsureDistributedSequencesHaveOneType(relationId, seqInfoList); } @@ -1405,18 +1584,16 @@ DropFKeysRelationInvolvedWithTableType(Oid relationId, int tableTypeFlag) /* - * DecideReplicationModel function decides which replication model should be - * used depending on given distribution configuration. + * DecideDistTableReplicationModel function decides which replication model should be + * used for a distributed table depending on given distribution configuration. */ static char -DecideReplicationModel(char distributionMethod, char *colocateWithTableName) +DecideDistTableReplicationModel(char distributionMethod, char *colocateWithTableName) { - if (distributionMethod == DISTRIBUTE_BY_NONE) - { - return REPLICATION_MODEL_2PC; - } - else if (pg_strncasecmp(colocateWithTableName, "default", NAMEDATALEN) != 0 && - !IsColocateWithNone(colocateWithTableName)) + Assert(distributionMethod != DISTRIBUTE_BY_NONE); + + if (!IsColocateWithDefault(colocateWithTableName) && + !IsColocateWithNone(colocateWithTableName)) { text *colocateWithTableNameText = cstring_to_text(colocateWithTableName); Oid colocatedRelationId = ResolveRelationId(colocateWithTableNameText, false); @@ -1491,28 +1668,34 @@ CreateHashDistributedTableShards(Oid relationId, int shardCount, /* - * ColocationIdForNewTable returns a colocation id for hash-distributed table + * ColocationIdForNewTable returns a colocation id for given table * according to given configuration. If there is no such configuration, it * creates one and returns colocation id of newly the created colocation group. + * Note that DistributedTableParams and the distribution column Var should be + * non-null only if CitusTableType corresponds to a distributed table. + * * For append and range distributed tables, this function errors out if * colocateWithTableName parameter is not NULL, otherwise directly returns * INVALID_COLOCATION_ID. * + * For reference tables, returns the common reference table colocation id. + * * This function assumes its caller take necessary lock on relationId to * prevent possible changes on it. */ static uint32 -ColocationIdForNewTable(Oid relationId, Var *distributionColumn, - char distributionMethod, char replicationModel, - int shardCount, bool shardCountIsStrict, - char *colocateWithTableName) +ColocationIdForNewTable(Oid relationId, CitusTableType tableType, + DistributedTableParams *distributedTableParams, + Var *distributionColumn) { + CitusTableParams citusTableParams = DecideCitusTableParams(tableType, + distributedTableParams); + uint32 colocationId = INVALID_COLOCATION_ID; - if (distributionMethod == DISTRIBUTE_BY_APPEND || - distributionMethod == DISTRIBUTE_BY_RANGE) + if (tableType == APPEND_DISTRIBUTED || tableType == RANGE_DISTRIBUTED) { - if (pg_strncasecmp(colocateWithTableName, "default", NAMEDATALEN) != 0) + if (!IsColocateWithDefault(distributedTableParams->colocateWithTableName)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot distribute relation"), @@ -1522,7 +1705,7 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn, return colocationId; } - else if (distributionMethod == DISTRIBUTE_BY_NONE) + else if (tableType == REFERENCE_TABLE) { return CreateReferenceTableColocationId(); } @@ -1533,27 +1716,29 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn, * can be sure that there will no modifications on the colocation table * until this transaction is committed. */ - Assert(distributionMethod == DISTRIBUTE_BY_HASH); + Assert(citusTableParams.distributionMethod == DISTRIBUTE_BY_HASH); Oid distributionColumnType = distributionColumn->vartype; Oid distributionColumnCollation = get_typcollation(distributionColumnType); /* get an advisory lock to serialize concurrent default group creations */ - if (IsColocateWithDefault(colocateWithTableName)) + if (IsColocateWithDefault(distributedTableParams->colocateWithTableName)) { AcquireColocationDefaultLock(); } colocationId = FindColocateWithColocationId(relationId, - replicationModel, + citusTableParams.replicationModel, distributionColumnType, distributionColumnCollation, - shardCount, + distributedTableParams->shardCount, + distributedTableParams-> shardCountIsStrict, + distributedTableParams-> colocateWithTableName); - if (IsColocateWithDefault(colocateWithTableName) && (colocationId != - INVALID_COLOCATION_ID)) + if (IsColocateWithDefault(distributedTableParams->colocateWithTableName) && + (colocationId != INVALID_COLOCATION_ID)) { /* * we can release advisory lock if there is already a default entry for given params; @@ -1565,23 +1750,25 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn, if (colocationId == INVALID_COLOCATION_ID) { - if (IsColocateWithDefault(colocateWithTableName)) + if (IsColocateWithDefault(distributedTableParams->colocateWithTableName)) { /* * Generate a new colocation ID and insert a pg_dist_colocation * record. */ - colocationId = CreateColocationGroup(shardCount, ShardReplicationFactor, + colocationId = CreateColocationGroup(distributedTableParams->shardCount, + ShardReplicationFactor, distributionColumnType, distributionColumnCollation); } - else if (IsColocateWithNone(colocateWithTableName)) + else if (IsColocateWithNone(distributedTableParams->colocateWithTableName)) { /* * Generate a new colocation ID and insert a pg_dist_colocation * record. */ - colocationId = CreateColocationGroup(shardCount, ShardReplicationFactor, + colocationId = CreateColocationGroup(distributedTableParams->shardCount, + ShardReplicationFactor, distributionColumnType, distributionColumnCollation); } @@ -1608,6 +1795,8 @@ EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn, { Oid parentRelationId = InvalidOid; + ErrorIfTableHasUnsupportedIdentityColumn(relationId); + EnsureLocalTableEmptyIfNecessary(relationId, distributionMethod); /* user really wants triggers? */ @@ -2219,12 +2408,12 @@ CopyLocalDataIntoShards(Oid distributedRelationId) EState *estate = CreateExecutorState(); ExprContext *econtext = GetPerTupleExprContext(estate); econtext->ecxt_scantuple = slot; - + const bool nonPublishableData = false; DestReceiver *copyDest = (DestReceiver *) CreateCitusCopyDestReceiver(distributedRelationId, columnNameList, partitionColumnIndex, - estate, NULL); + estate, NULL, nonPublishableData); /* initialise state for writing to shards, we'll open connections on demand */ copyDest->rStartup(copyDest, 0, tupleDescriptor); diff --git a/src/backend/distributed/commands/dependencies.c b/src/backend/distributed/commands/dependencies.c index 735449973..baa5082d7 100644 --- a/src/backend/distributed/commands/dependencies.c +++ b/src/backend/distributed/commands/dependencies.c @@ -29,16 +29,14 @@ #include "storage/lmgr.h" #include "utils/lsyscache.h" -typedef bool (*AddressPredicate)(const ObjectAddress *); static void EnsureDependenciesCanBeDistributed(const ObjectAddress *relationAddress); static void ErrorIfCircularDependencyExists(const ObjectAddress *objectAddress); static int ObjectAddressComparator(const void *a, const void *b); -static List * FilterObjectAddressListByPredicate(List *objectAddressList, - AddressPredicate predicate); static void EnsureDependenciesExistOnAllNodes(const ObjectAddress *target); static List * GetDependencyCreateDDLCommands(const ObjectAddress *dependency); static bool ShouldPropagateObject(const ObjectAddress *address); +static char * DropTableIfExistsCommand(Oid relationId); /* * EnsureDependenciesExistOnAllNodes finds all the dependencies that we support and makes @@ -325,6 +323,21 @@ GetDistributableDependenciesForObject(const ObjectAddress *target) } +/* + * DropTableIfExistsCommand returns command to drop given table if exists. + */ +static char * +DropTableIfExistsCommand(Oid relationId) +{ + char *qualifiedRelationName = generate_qualified_relation_name(relationId); + StringInfo dropTableCommand = makeStringInfo(); + appendStringInfo(dropTableCommand, "DROP TABLE IF EXISTS %s CASCADE", + qualifiedRelationName); + + return dropTableCommand->data; +} + + /* * GetDependencyCreateDDLCommands returns a list (potentially empty or NIL) of ddl * commands to execute on a worker to create the object. @@ -370,7 +383,7 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency) bool creatingShellTableOnRemoteNode = true; List *tableDDLCommands = GetFullTableCreationCommands(relationId, WORKER_NEXTVAL_SEQUENCE_DEFAULTS, - INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS, + INCLUDE_IDENTITY, creatingShellTableOnRemoteNode); TableDDLCommand *tableDDLCommand = NULL; foreach_ptr(tableDDLCommand, tableDDLCommands) @@ -379,6 +392,10 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency) commandList = lappend(commandList, GetTableDDLCommand( tableDDLCommand)); } + + /* we need to drop table, if exists, first to make table creation idempotent */ + commandList = lcons(DropTableIfExistsCommand(relationId), + commandList); } return commandList; @@ -438,6 +455,11 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency) return DDLCommands; } + case OCLASS_PUBLICATION: + { + return CreatePublicationDDLCommandsIdempotent(dependency); + } + case OCLASS_ROLE: { return GenerateCreateOrAlterRoleCommand(dependency->objectId); @@ -527,68 +549,6 @@ GetAllDependencyCreateDDLCommands(const List *dependencies) } -/* - * ReplicateAllObjectsToNodeCommandList returns commands to replicate all - * previously marked objects to a worker node. The function also sets - * clusterHasDistributedFunction if there are any distributed functions. - */ -List * -ReplicateAllObjectsToNodeCommandList(const char *nodeName, int nodePort) -{ - /* since we are executing ddl commands disable propagation first, primarily for mx */ - List *ddlCommands = list_make1(DISABLE_DDL_PROPAGATION); - - /* - * collect all dependencies in creation order and get their ddl commands - */ - List *dependencies = GetDistributedObjectAddressList(); - - /* - * Depending on changes in the environment, such as the enable_metadata_sync guc - * there might be objects in the distributed object address list that should currently - * not be propagated by citus as they are 'not supported'. - */ - dependencies = FilterObjectAddressListByPredicate(dependencies, - &SupportedDependencyByCitus); - - /* - * When dependency lists are getting longer we see a delay in the creation time on the - * workers. We would like to inform the user. Currently we warn for lists greater than - * 100 items, where 100 is an arbitrarily chosen number. If we find it too high or too - * low we can adjust this based on experience. - */ - if (list_length(dependencies) > 100) - { - ereport(NOTICE, (errmsg("Replicating postgres objects to node %s:%d", nodeName, - nodePort), - errdetail("There are %d objects to replicate, depending on your " - "environment this might take a while", - list_length(dependencies)))); - } - - dependencies = OrderObjectAddressListInDependencyOrder(dependencies); - ObjectAddress *dependency = NULL; - foreach_ptr(dependency, dependencies) - { - if (IsAnyObjectAddressOwnedByExtension(list_make1(dependency), NULL)) - { - /* - * we expect extension-owned objects to be created as a result - * of the extension being created. - */ - continue; - } - - ddlCommands = list_concat(ddlCommands, - GetDependencyCreateDDLCommands(dependency)); - } - - ddlCommands = lappend(ddlCommands, ENABLE_DDL_PROPAGATION); - - return ddlCommands; -} - - /* * ShouldPropagate determines if we should be propagating anything */ @@ -744,7 +704,7 @@ ShouldPropagateAnyObject(List *addresses) * FilterObjectAddressListByPredicate takes a list of ObjectAddress *'s and returns a list * only containing the ObjectAddress *'s for which the predicate returned true. */ -static List * +List * FilterObjectAddressListByPredicate(List *objectAddressList, AddressPredicate predicate) { List *result = NIL; diff --git a/src/backend/distributed/commands/distribute_object_ops.c b/src/backend/distributed/commands/distribute_object_ops.c index 3f30eaaa2..017cb6537 100644 --- a/src/backend/distributed/commands/distribute_object_ops.c +++ b/src/backend/distributed/commands/distribute_object_ops.c @@ -245,6 +245,15 @@ static DistributeObjectOps Any_CreatePolicy = { .address = NULL, .markDistributed = false, }; +static DistributeObjectOps Any_CreatePublication = { + .deparse = DeparseCreatePublicationStmt, + .qualify = QualifyCreatePublicationStmt, + .preprocess = NULL, + .postprocess = PostProcessCreatePublicationStmt, + .operationType = DIST_OPS_CREATE, + .address = CreatePublicationStmtObjectAddress, + .markDistributed = true, +}; static DistributeObjectOps Any_CreateRole = { .deparse = DeparseCreateRoleStmt, .qualify = NULL, @@ -707,6 +716,45 @@ static DistributeObjectOps Procedure_Rename = { .address = RenameFunctionStmtObjectAddress, .markDistributed = false, }; +static DistributeObjectOps Publication_Alter = { + .deparse = DeparseAlterPublicationStmt, + .qualify = QualifyAlterPublicationStmt, + .preprocess = PreprocessAlterPublicationStmt, + .postprocess = PostprocessAlterDistributedObjectStmt, + .objectType = OBJECT_PUBLICATION, + .operationType = DIST_OPS_ALTER, + .address = AlterPublicationStmtObjectAddress, + .markDistributed = false, +}; +static DistributeObjectOps Publication_AlterOwner = { + .deparse = DeparseAlterPublicationOwnerStmt, + .qualify = NULL, + .preprocess = PreprocessAlterDistributedObjectStmt, + .postprocess = PostprocessAlterDistributedObjectStmt, + .objectType = OBJECT_PUBLICATION, + .operationType = DIST_OPS_ALTER, + .address = AlterPublicationOwnerStmtObjectAddress, + .markDistributed = false, +}; +static DistributeObjectOps Publication_Drop = { + .deparse = DeparseDropPublicationStmt, + .qualify = NULL, + .preprocess = PreprocessDropDistributedObjectStmt, + .postprocess = NULL, + .operationType = DIST_OPS_DROP, + .address = NULL, + .markDistributed = false, +}; +static DistributeObjectOps Publication_Rename = { + .deparse = DeparseRenamePublicationStmt, + .qualify = NULL, + .preprocess = PreprocessAlterDistributedObjectStmt, + .postprocess = NULL, + .objectType = OBJECT_PUBLICATION, + .operationType = DIST_OPS_ALTER, + .address = RenamePublicationStmtObjectAddress, + .markDistributed = false, +}; static DistributeObjectOps Routine_AlterObjectDepends = { .deparse = DeparseAlterFunctionDependsStmt, .qualify = QualifyAlterFunctionDependsStmt, @@ -1399,6 +1447,11 @@ GetDistributeObjectOps(Node *node) return &Procedure_AlterOwner; } + case OBJECT_PUBLICATION: + { + return &Publication_AlterOwner; + } + case OBJECT_ROUTINE: { return &Routine_AlterOwner; @@ -1436,6 +1489,11 @@ GetDistributeObjectOps(Node *node) return &Any_AlterPolicy; } + case T_AlterPublicationStmt: + { + return &Publication_Alter; + } + case T_AlterRoleStmt: { return &Any_AlterRole; @@ -1610,6 +1668,11 @@ GetDistributeObjectOps(Node *node) return &Any_CreatePolicy; } + case T_CreatePublicationStmt: + { + return &Any_CreatePublication; + } + case T_CreateRoleStmt: { return &Any_CreateRole; @@ -1722,6 +1785,11 @@ GetDistributeObjectOps(Node *node) return &Procedure_Drop; } + case OBJECT_PUBLICATION: + { + return &Publication_Drop; + } + case OBJECT_ROUTINE: { return &Routine_Drop; @@ -1901,6 +1969,11 @@ GetDistributeObjectOps(Node *node) return &Procedure_Rename; } + case OBJECT_PUBLICATION: + { + return &Publication_Rename; + } + case OBJECT_ROUTINE: { return &Routine_Rename; diff --git a/src/backend/distributed/commands/foreign_constraint.c b/src/backend/distributed/commands/foreign_constraint.c index cf1e43fd4..6f12db13f 100644 --- a/src/backend/distributed/commands/foreign_constraint.c +++ b/src/backend/distributed/commands/foreign_constraint.c @@ -221,7 +221,8 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis if (!referencedIsCitus && !selfReferencingTable) { if (IsCitusLocalTableByDistParams(referencingDistMethod, - referencingReplicationModel)) + referencingReplicationModel, + referencingColocationId)) { ErrorOutForFKeyBetweenPostgresAndCitusLocalTable(referencedTableId); } @@ -245,8 +246,7 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis if (!selfReferencingTable) { referencedDistMethod = PartitionMethod(referencedTableId); - referencedDistKey = IsCitusTableType(referencedTableId, - CITUS_TABLE_WITH_NO_DIST_KEY) ? + referencedDistKey = !HasDistributionKey(referencedTableId) ? NULL : DistPartitionKey(referencedTableId); referencedColocationId = TableColocationId(referencedTableId); @@ -278,9 +278,17 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis } bool referencingIsCitusLocalOrRefTable = - (referencingDistMethod == DISTRIBUTE_BY_NONE); + IsCitusLocalTableByDistParams(referencingDistMethod, + referencingReplicationModel, + referencingColocationId) || + IsReferenceTableByDistParams(referencingDistMethod, + referencingReplicationModel); bool referencedIsCitusLocalOrRefTable = - (referencedDistMethod == DISTRIBUTE_BY_NONE); + IsCitusLocalTableByDistParams(referencedDistMethod, + referencedReplicationModel, + referencedColocationId) || + IsReferenceTableByDistParams(referencedDistMethod, + referencedReplicationModel); if (referencingIsCitusLocalOrRefTable && referencedIsCitusLocalOrRefTable) { EnsureSupportedFKeyBetweenCitusLocalAndRefTable(constraintForm, @@ -313,7 +321,8 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis * reference table is referenced. */ bool referencedIsReferenceTable = - (referencedReplicationModel == REPLICATION_MODEL_2PC); + IsReferenceTableByDistParams(referencedDistMethod, + referencedReplicationModel); if (!referencedIsReferenceTable && ( referencingColocationId == INVALID_COLOCATION_ID || referencingColocationId != referencedColocationId)) diff --git a/src/backend/distributed/commands/index.c b/src/backend/distributed/commands/index.c index 5f1598510..aa0715372 100644 --- a/src/backend/distributed/commands/index.c +++ b/src/backend/distributed/commands/index.c @@ -1190,7 +1190,7 @@ ErrorIfUnsupportedIndexStmt(IndexStmt *createIndexStatement) * Non-distributed tables do not have partition key, and unique constraints * are allowed for them. Thus, we added a short-circuit for non-distributed tables. */ - if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKey(relationId)) { return; } diff --git a/src/backend/distributed/commands/local_multi_copy.c b/src/backend/distributed/commands/local_multi_copy.c index fbfce7119..5cf01baf4 100644 --- a/src/backend/distributed/commands/local_multi_copy.c +++ b/src/backend/distributed/commands/local_multi_copy.c @@ -36,6 +36,7 @@ #include "distributed/local_multi_copy.h" #include "distributed/shard_utils.h" #include "distributed/version_compat.h" +#include "distributed/replication_origin_session_utils.h" /* managed via GUC, default is 512 kB */ int LocalCopyFlushThresholdByte = 512 * 1024; @@ -46,7 +47,7 @@ static void AddSlotToBuffer(TupleTableSlot *slot, CitusCopyDestReceiver *copyDes static bool ShouldAddBinaryHeaders(StringInfo buffer, bool isBinary); static bool ShouldSendCopyNow(StringInfo buffer); static void DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId, - CopyStmt *copyStatement, bool isEndOfCopy); + CopyStmt *copyStatement, bool isEndOfCopy, bool isPublishable); static int ReadFromLocalBufferCallback(void *outBuf, int minRead, int maxRead); @@ -94,7 +95,7 @@ WriteTupleToLocalShard(TupleTableSlot *slot, CitusCopyDestReceiver *copyDest, in bool isEndOfCopy = false; DoLocalCopy(localCopyOutState->fe_msgbuf, copyDest->distributedRelationId, shardId, - copyDest->copyStatement, isEndOfCopy); + copyDest->copyStatement, isEndOfCopy, copyDest->isPublishable); resetStringInfo(localCopyOutState->fe_msgbuf); } } @@ -133,7 +134,7 @@ FinishLocalCopyToShard(CitusCopyDestReceiver *copyDest, int64 shardId, } bool isEndOfCopy = true; DoLocalCopy(localCopyOutState->fe_msgbuf, copyDest->distributedRelationId, shardId, - copyDest->copyStatement, isEndOfCopy); + copyDest->copyStatement, isEndOfCopy, copyDest->isPublishable); } @@ -197,7 +198,7 @@ ShouldSendCopyNow(StringInfo buffer) */ static void DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId, CopyStmt *copyStatement, - bool isEndOfCopy) + bool isEndOfCopy, bool isPublishable) { /* * Set the buffer as a global variable to allow ReadFromLocalBufferCallback @@ -205,6 +206,10 @@ DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId, CopyStmt *copyStat * ReadFromLocalBufferCallback. */ LocalCopyBuffer = buffer; + if (!isPublishable) + { + SetupReplicationOriginLocalSession(); + } Oid shardOid = GetTableLocalShardOid(relationId, shardId); Relation shard = table_open(shardOid, RowExclusiveLock); @@ -219,6 +224,10 @@ DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId, CopyStmt *copyStat EndCopyFrom(cstate); table_close(shard, NoLock); + if (!isPublishable) + { + ResetReplicationOriginLocalSession(); + } free_parsestate(pState); } diff --git a/src/backend/distributed/commands/multi_copy.c b/src/backend/distributed/commands/multi_copy.c index b5ac6a519..6e3d19b68 100644 --- a/src/backend/distributed/commands/multi_copy.c +++ b/src/backend/distributed/commands/multi_copy.c @@ -85,6 +85,7 @@ #include "distributed/relation_access_tracking.h" #include "distributed/remote_commands.h" #include "distributed/remote_transaction.h" +#include "distributed/replication_origin_session_utils.h" #include "distributed/resource_lock.h" #include "distributed/shard_pruning.h" #include "distributed/shared_connection_stats.h" @@ -270,7 +271,8 @@ static CopyConnectionState * GetConnectionState(HTAB *connectionStateHash, static CopyShardState * GetShardState(uint64 shardId, HTAB *shardStateHash, HTAB *connectionStateHash, bool *found, bool shouldUseLocalCopy, CopyOutState - copyOutState, bool isColocatedIntermediateResult); + copyOutState, bool isColocatedIntermediateResult, + bool isPublishable); static MultiConnection * CopyGetPlacementConnection(HTAB *connectionStateHash, ShardPlacement *placement, bool colocatedIntermediateResult); @@ -285,7 +287,8 @@ static void InitializeCopyShardState(CopyShardState *shardState, uint64 shardId, bool canUseLocalCopy, CopyOutState copyOutState, - bool colocatedIntermediateResult); + bool colocatedIntermediateResult, bool + isPublishable); static void StartPlacementStateCopyCommand(CopyPlacementState *placementState, CopyStmt *copyStatement, CopyOutState copyOutState); @@ -393,7 +396,7 @@ CitusCopyFrom(CopyStmt *copyStatement, QueryCompletion *completionTag) if (IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) || IsCitusTableTypeCacheEntry(cacheEntry, RANGE_DISTRIBUTED) || IsCitusTableTypeCacheEntry(cacheEntry, APPEND_DISTRIBUTED) || - IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + !HasDistributionKeyCacheEntry(cacheEntry)) { CopyToExistingShards(copyStatement, completionTag); } @@ -492,9 +495,11 @@ CopyToExistingShards(CopyStmt *copyStatement, QueryCompletion *completionTag) ExprContext *executorExpressionContext = GetPerTupleExprContext(executorState); /* set up the destination for the COPY */ + const bool publishableData = true; CitusCopyDestReceiver *copyDest = CreateCitusCopyDestReceiver(tableId, columnNameList, partitionColumnIndex, - executorState, NULL); + executorState, NULL, + publishableData); /* if the user specified an explicit append-to_shard option, write to it */ uint64 appendShardId = ProcessAppendToShardOption(tableId, copyStatement); @@ -1934,7 +1939,7 @@ CopyFlushOutput(CopyOutState cstate, char *start, char *pointer) CitusCopyDestReceiver * CreateCitusCopyDestReceiver(Oid tableId, List *columnNameList, int partitionColumnIndex, EState *executorState, - char *intermediateResultIdPrefix) + char *intermediateResultIdPrefix, bool isPublishable) { CitusCopyDestReceiver *copyDest = (CitusCopyDestReceiver *) palloc0( sizeof(CitusCopyDestReceiver)); @@ -1953,6 +1958,7 @@ CreateCitusCopyDestReceiver(Oid tableId, List *columnNameList, int partitionColu copyDest->executorState = executorState; copyDest->colocatedIntermediateResultIdPrefix = intermediateResultIdPrefix; copyDest->memoryContext = CurrentMemoryContext; + copyDest->isPublishable = isPublishable; return copyDest; } @@ -2318,7 +2324,9 @@ CitusSendTupleToPlacements(TupleTableSlot *slot, CitusCopyDestReceiver *copyDest &cachedShardStateFound, copyDest->shouldUseLocalCopy, copyDest->copyOutState, - isColocatedIntermediateResult); + isColocatedIntermediateResult, + copyDest->isPublishable); + if (!cachedShardStateFound) { firstTupleInShard = true; @@ -2751,6 +2759,11 @@ ShutdownCopyConnectionState(CopyConnectionState *connectionState, if (activePlacementState != NULL) { EndPlacementStateCopyCommand(activePlacementState, copyOutState); + if (!copyDest->isPublishable) + { + ResetReplicationOriginRemoteSession( + activePlacementState->connectionState->connection); + } } dlist_foreach(iter, &connectionState->bufferedPlacementList) @@ -2764,6 +2777,10 @@ ShutdownCopyConnectionState(CopyConnectionState *connectionState, SendCopyDataToPlacement(placementState->data, shardId, connectionState->connection); EndPlacementStateCopyCommand(placementState, copyOutState); + if (!copyDest->isPublishable) + { + ResetReplicationOriginRemoteSession(connectionState->connection); + } } } @@ -3436,7 +3453,7 @@ static CopyShardState * GetShardState(uint64 shardId, HTAB *shardStateHash, HTAB *connectionStateHash, bool *found, bool shouldUseLocalCopy, CopyOutState copyOutState, - bool isColocatedIntermediateResult) + bool isColocatedIntermediateResult, bool isPublishable) { CopyShardState *shardState = (CopyShardState *) hash_search(shardStateHash, &shardId, HASH_ENTER, found); @@ -3444,7 +3461,8 @@ GetShardState(uint64 shardId, HTAB *shardStateHash, { InitializeCopyShardState(shardState, connectionStateHash, shardId, shouldUseLocalCopy, - copyOutState, isColocatedIntermediateResult); + copyOutState, isColocatedIntermediateResult, + isPublishable); } return shardState; @@ -3461,7 +3479,8 @@ InitializeCopyShardState(CopyShardState *shardState, HTAB *connectionStateHash, uint64 shardId, bool shouldUseLocalCopy, CopyOutState copyOutState, - bool colocatedIntermediateResult) + bool colocatedIntermediateResult, + bool isPublishable) { ListCell *placementCell = NULL; int failedPlacementCount = 0; @@ -3532,6 +3551,11 @@ InitializeCopyShardState(CopyShardState *shardState, RemoteTransactionBeginIfNecessary(connection); } + if (!isPublishable) + { + SetupReplicationOriginRemoteSession(connection); + } + CopyPlacementState *placementState = palloc0(sizeof(CopyPlacementState)); placementState->shardState = shardState; placementState->data = makeStringInfo(); diff --git a/src/backend/distributed/commands/publication.c b/src/backend/distributed/commands/publication.c new file mode 100644 index 000000000..581f7f874 --- /dev/null +++ b/src/backend/distributed/commands/publication.c @@ -0,0 +1,634 @@ +/*------------------------------------------------------------------------- + * + * publication.c + * Commands for creating publications + * + * Copyright (c) Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "miscadmin.h" + +#include "catalog/pg_publication.h" +#include "catalog/pg_publication_rel.h" +#include "distributed/commands.h" +#include "distributed/deparser.h" +#include "distributed/listutils.h" +#include "distributed/metadata_utility.h" +#include "distributed/metadata_sync.h" +#include "distributed/metadata/distobject.h" +#include "distributed/reference_table_utils.h" +#include "distributed/worker_create_or_replace.h" +#include "nodes/makefuncs.h" +#include "nodes/parsenodes.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" + +#include "pg_version_compat.h" + + +static CreatePublicationStmt * BuildCreatePublicationStmt(Oid publicationId); +#if (PG_VERSION_NUM >= PG_VERSION_15) +static PublicationObjSpec * BuildPublicationRelationObjSpec(Oid relationId, + Oid publicationId, + bool tableOnly); +#endif +static void AppendPublishOptionList(StringInfo str, List *strings); +static char * AlterPublicationOwnerCommand(Oid publicationId); +static bool ShouldPropagateCreatePublication(CreatePublicationStmt *stmt); +static List * ObjectAddressForPublicationName(char *publicationName, bool missingOk); + + +/* + * PostProcessCreatePublicationStmt handles CREATE PUBLICATION statements + * that contain distributed tables. + */ +List * +PostProcessCreatePublicationStmt(Node *node, const char *queryString) +{ + CreatePublicationStmt *stmt = castNode(CreatePublicationStmt, node); + + if (!ShouldPropagateCreatePublication(stmt)) + { + /* should not propagate right now */ + return NIL; + } + + /* call into CreatePublicationStmtObjectAddress */ + List *publicationAddresses = GetObjectAddressListFromParseTree(node, false, true); + + /* the code-path only supports a single object */ + Assert(list_length(publicationAddresses) == 1); + + if (IsAnyObjectAddressOwnedByExtension(publicationAddresses, NULL)) + { + /* should not propagate publications owned by extensions */ + return NIL; + } + + EnsureAllObjectDependenciesExistOnAllNodes(publicationAddresses); + + const ObjectAddress *pubAddress = linitial(publicationAddresses); + + List *commands = NIL; + commands = lappend(commands, DISABLE_DDL_PROPAGATION); + commands = lappend(commands, CreatePublicationDDLCommand(pubAddress->objectId)); + commands = lappend(commands, ENABLE_DDL_PROPAGATION); + + return NodeDDLTaskList(NON_COORDINATOR_NODES, commands); +} + + +/* + * CreatePublicationDDLCommandsIdempotent returns a list of DDL statements to be + * executed on a node to recreate the publication addressed by the publicationAddress. + */ +List * +CreatePublicationDDLCommandsIdempotent(const ObjectAddress *publicationAddress) +{ + Assert(publicationAddress->classId == PublicationRelationId); + + char *ddlCommand = + CreatePublicationDDLCommand(publicationAddress->objectId); + + char *alterPublicationOwnerSQL = + AlterPublicationOwnerCommand(publicationAddress->objectId); + + return list_make2( + WrapCreateOrReplace(ddlCommand), + alterPublicationOwnerSQL); +} + + +/* + * CreatePublicationDDLCommand returns the CREATE PUBLICATION string that + * can be used to recreate a given publication. + */ +char * +CreatePublicationDDLCommand(Oid publicationId) +{ + CreatePublicationStmt *createPubStmt = BuildCreatePublicationStmt(publicationId); + + /* we took the WHERE clause from the catalog where it is already transformed */ + bool whereClauseRequiresTransform = false; + + /* only propagate Citus tables in publication */ + bool includeLocalTables = false; + + return DeparseCreatePublicationStmtExtended((Node *) createPubStmt, + whereClauseRequiresTransform, + includeLocalTables); +} + + +/* + * BuildCreatePublicationStmt constructs a CreatePublicationStmt struct for the + * given publication. + */ +static CreatePublicationStmt * +BuildCreatePublicationStmt(Oid publicationId) +{ + CreatePublicationStmt *createPubStmt = makeNode(CreatePublicationStmt); + + HeapTuple publicationTuple = + SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(publicationId)); + + if (!HeapTupleIsValid(publicationTuple)) + { + ereport(ERROR, (errmsg("cannot find publication with oid: %d", publicationId))); + } + + Form_pg_publication publicationForm = + (Form_pg_publication) GETSTRUCT(publicationTuple); + + /* CREATE PUBLICATION */ + createPubStmt->pubname = pstrdup(NameStr(publicationForm->pubname)); + + /* FOR ALL TABLES */ + createPubStmt->for_all_tables = publicationForm->puballtables; + + ReleaseSysCache(publicationTuple); + +#if (PG_VERSION_NUM >= PG_VERSION_15) + List *schemaIds = GetPublicationSchemas(publicationId); + Oid schemaId = InvalidOid; + + foreach_oid(schemaId, schemaIds) + { + char *schemaName = get_namespace_name(schemaId); + + PublicationObjSpec *publicationObject = makeNode(PublicationObjSpec); + publicationObject->pubobjtype = PUBLICATIONOBJ_TABLES_IN_SCHEMA; + publicationObject->pubtable = NULL; + publicationObject->name = schemaName; + publicationObject->location = -1; + + createPubStmt->pubobjects = lappend(createPubStmt->pubobjects, publicationObject); + } +#endif + + List *relationIds = GetPublicationRelations(publicationId, + publicationForm->pubviaroot ? + PUBLICATION_PART_ROOT : + PUBLICATION_PART_LEAF); + Oid relationId = InvalidOid; + int citusTableCount PG_USED_FOR_ASSERTS_ONLY = 0; + + /* mainly for consistent ordering in test output */ + relationIds = SortList(relationIds, CompareOids); + + foreach_oid(relationId, relationIds) + { +#if (PG_VERSION_NUM >= PG_VERSION_15) + bool tableOnly = false; + + /* since postgres 15, tables can have a column list and filter */ + PublicationObjSpec *publicationObject = + BuildPublicationRelationObjSpec(relationId, publicationId, tableOnly); + + createPubStmt->pubobjects = lappend(createPubStmt->pubobjects, publicationObject); +#else + + /* before postgres 15, only full tables are supported */ + char *schemaName = get_namespace_name(get_rel_namespace(relationId)); + char *tableName = get_rel_name(relationId); + RangeVar *rangeVar = makeRangeVar(schemaName, tableName, -1); + + createPubStmt->tables = lappend(createPubStmt->tables, rangeVar); +#endif + + if (IsCitusTable(relationId)) + { + citusTableCount++; + } + } + + /* WITH (publish_via_partition_root = true) option */ + bool publishViaRoot = publicationForm->pubviaroot; + char *publishViaRootString = publishViaRoot ? "true" : "false"; + DefElem *pubViaRootOption = makeDefElem("publish_via_partition_root", + (Node *) makeString(publishViaRootString), + -1); + createPubStmt->options = lappend(createPubStmt->options, pubViaRootOption); + + /* WITH (publish = 'insert, update, delete, truncate') option */ + List *publishList = NIL; + + if (publicationForm->pubinsert) + { + publishList = lappend(publishList, makeString("insert")); + } + + if (publicationForm->pubupdate) + { + publishList = lappend(publishList, makeString("update")); + } + + if (publicationForm->pubdelete) + { + publishList = lappend(publishList, makeString("delete")); + } + + if (publicationForm->pubtruncate) + { + publishList = lappend(publishList, makeString("truncate")); + } + + if (list_length(publishList) > 0) + { + StringInfo optionValue = makeStringInfo(); + AppendPublishOptionList(optionValue, publishList); + + DefElem *publishOption = makeDefElem("publish", + (Node *) makeString(optionValue->data), -1); + createPubStmt->options = lappend(createPubStmt->options, publishOption); + } + + + return createPubStmt; +} + + +/* + * AppendPublishOptionList appends a list of publication options in + * comma-separate form. + */ +static void +AppendPublishOptionList(StringInfo str, List *options) +{ + ListCell *stringCell = NULL; + foreach(stringCell, options) + { + const char *string = strVal(lfirst(stringCell)); + if (stringCell != list_head(options)) + { + appendStringInfoString(str, ", "); + } + + /* we cannot escape these strings */ + appendStringInfoString(str, string); + } +} + + +#if (PG_VERSION_NUM >= PG_VERSION_15) + +/* + * BuildPublicationRelationObjSpec returns a PublicationObjSpec that + * can be included in a CREATE or ALTER PUBLICATION statement. + */ +static PublicationObjSpec * +BuildPublicationRelationObjSpec(Oid relationId, Oid publicationId, + bool tableOnly) +{ + HeapTuple pubRelationTuple = SearchSysCache2(PUBLICATIONRELMAP, + ObjectIdGetDatum(relationId), + ObjectIdGetDatum(publicationId)); + if (!HeapTupleIsValid(pubRelationTuple)) + { + ereport(ERROR, (errmsg("cannot find relation with oid %d in publication " + "with oid %d", relationId, publicationId))); + } + + List *columnNameList = NIL; + Node *whereClause = NULL; + + /* build the column list */ + if (!tableOnly) + { + bool isNull = false; + Datum attributesDatum = SysCacheGetAttr(PUBLICATIONRELMAP, pubRelationTuple, + Anum_pg_publication_rel_prattrs, + &isNull); + if (!isNull) + { + ArrayType *attributesArray = DatumGetArrayTypeP(attributesDatum); + int attributeCount = ARR_DIMS(attributesArray)[0]; + int16 *elems = (int16 *) ARR_DATA_PTR(attributesArray); + + for (int attNumIndex = 0; attNumIndex < attributeCount; attNumIndex++) + { + AttrNumber attributeNumber = elems[attNumIndex]; + char *columnName = get_attname(relationId, attributeNumber, false); + + columnNameList = lappend(columnNameList, makeString(columnName)); + } + } + + /* build the WHERE clause */ + Datum whereClauseDatum = SysCacheGetAttr(PUBLICATIONRELMAP, pubRelationTuple, + Anum_pg_publication_rel_prqual, + &isNull); + if (!isNull) + { + /* + * We use the already-transformed parse tree form here, which does + * not match regular CreatePublicationStmt + */ + whereClause = stringToNode(TextDatumGetCString(whereClauseDatum)); + } + } + + ReleaseSysCache(pubRelationTuple); + + char *schemaName = get_namespace_name(get_rel_namespace(relationId)); + char *tableName = get_rel_name(relationId); + RangeVar *rangeVar = makeRangeVar(schemaName, tableName, -1); + + /* build the FOR TABLE */ + PublicationTable *publicationTable = + makeNode(PublicationTable); + publicationTable->relation = rangeVar; + publicationTable->whereClause = whereClause; + publicationTable->columns = columnNameList; + + PublicationObjSpec *publicationObject = makeNode(PublicationObjSpec); + publicationObject->pubobjtype = PUBLICATIONOBJ_TABLE; + publicationObject->pubtable = publicationTable; + publicationObject->name = NULL; + publicationObject->location = -1; + + return publicationObject; +} + + +#endif + + +/* + * PreprocessAlterPublicationStmt handles ALTER PUBLICATION statements + * in a way that is mostly similar to PreprocessAlterDistributedObjectStmt, + * except we do not ensure sequential mode (publications do not interact with + * shards) and can handle NULL deparse commands for ALTER PUBLICATION commands + * that only involve local tables. + */ +List * +PreprocessAlterPublicationStmt(Node *stmt, const char *queryString, + ProcessUtilityContext processUtilityContext) +{ + List *addresses = GetObjectAddressListFromParseTree(stmt, false, false); + + /* the code-path only supports a single object */ + Assert(list_length(addresses) == 1); + + if (!ShouldPropagateAnyObject(addresses)) + { + return NIL; + } + + EnsureCoordinator(); + QualifyTreeNode(stmt); + + const char *sql = DeparseTreeNode((Node *) stmt); + if (sql == NULL) + { + /* + * Deparsing logic decided that there is nothing to propagate, e.g. + * because the command only concerns local tables. + */ + return NIL; + } + + List *commands = list_make3(DISABLE_DDL_PROPAGATION, + (void *) sql, + ENABLE_DDL_PROPAGATION); + + return NodeDDLTaskList(NON_COORDINATOR_NODES, commands); +} + + +/* + * GetAlterPublicationDDLCommandsForTable gets a list of ALTER PUBLICATION .. ADD/DROP + * commands for the given table. + * + * If isAdd is true, it return ALTER PUBLICATION .. ADD TABLE commands for all + * publications. + * + * Otherwise, it returns ALTER PUBLICATION .. DROP TABLE commands for all + * publications. + */ +List * +GetAlterPublicationDDLCommandsForTable(Oid relationId, bool isAdd) +{ + List *commands = NIL; + + List *publicationIds = GetRelationPublications(relationId); + Oid publicationId = InvalidOid; + + foreach_oid(publicationId, publicationIds) + { + char *command = GetAlterPublicationTableDDLCommand(publicationId, + relationId, isAdd); + + commands = lappend(commands, command); + } + + return commands; +} + + +/* + * GetAlterPublicationTableDDLCommand generates an ALTer PUBLICATION .. ADD/DROP TABLE + * command for the given publication and relation ID. + * + * If isAdd is true, it return an ALTER PUBLICATION .. ADD TABLE command. + * Otherwise, it returns ALTER PUBLICATION .. DROP TABLE command. + */ +char * +GetAlterPublicationTableDDLCommand(Oid publicationId, Oid relationId, + bool isAdd) +{ + HeapTuple pubTuple = SearchSysCache1(PUBLICATIONOID, + ObjectIdGetDatum(publicationId)); + if (!HeapTupleIsValid(pubTuple)) + { + ereport(ERROR, (errmsg("cannot find publication with oid: %d", + publicationId))); + } + + Form_pg_publication pubForm = (Form_pg_publication) GETSTRUCT(pubTuple); + + AlterPublicationStmt *alterPubStmt = makeNode(AlterPublicationStmt); + alterPubStmt->pubname = pstrdup(NameStr(pubForm->pubname)); + + ReleaseSysCache(pubTuple); + +#if (PG_VERSION_NUM >= PG_VERSION_15) + bool tableOnly = !isAdd; + + /* since postgres 15, tables can have a column list and filter */ + PublicationObjSpec *publicationObject = + BuildPublicationRelationObjSpec(relationId, publicationId, tableOnly); + + alterPubStmt->pubobjects = lappend(alterPubStmt->pubobjects, publicationObject); + alterPubStmt->action = isAdd ? AP_AddObjects : AP_DropObjects; +#else + + /* before postgres 15, only full tables are supported */ + char *schemaName = get_namespace_name(get_rel_namespace(relationId)); + char *tableName = get_rel_name(relationId); + RangeVar *rangeVar = makeRangeVar(schemaName, tableName, -1); + + alterPubStmt->tables = lappend(alterPubStmt->tables, rangeVar); + alterPubStmt->tableAction = isAdd ? DEFELEM_ADD : DEFELEM_DROP; +#endif + + /* we take the WHERE clause from the catalog where it is already transformed */ + bool whereClauseNeedsTransform = false; + + /* + * We use these commands to restore publications before/after transforming a + * table, including transformations to/from local tables. + */ + bool includeLocalTables = true; + + char *command = DeparseAlterPublicationStmtExtended((Node *) alterPubStmt, + whereClauseNeedsTransform, + includeLocalTables); + + return command; +} + + +/* + * AlterPublicationOwnerCommand returns "ALTER PUBLICATION .. OWNER TO .." + * statement for the specified publication. + */ +static char * +AlterPublicationOwnerCommand(Oid publicationId) +{ + HeapTuple publicationTuple = + SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(publicationId)); + + if (!HeapTupleIsValid(publicationTuple)) + { + ereport(ERROR, (errmsg("cannot find publication with oid: %d", + publicationId))); + } + + Form_pg_publication publicationForm = + (Form_pg_publication) GETSTRUCT(publicationTuple); + + char *publicationName = NameStr(publicationForm->pubname); + Oid publicationOwnerId = publicationForm->pubowner; + + char *publicationOwnerName = GetUserNameFromId(publicationOwnerId, false); + + StringInfo alterCommand = makeStringInfo(); + appendStringInfo(alterCommand, "ALTER PUBLICATION %s OWNER TO %s", + quote_identifier(publicationName), + quote_identifier(publicationOwnerName)); + + ReleaseSysCache(publicationTuple); + + return alterCommand->data; +} + + +/* + * ShouldPropagateCreatePublication tests if we need to propagate a CREATE PUBLICATION + * statement. + */ +static bool +ShouldPropagateCreatePublication(CreatePublicationStmt *stmt) +{ + if (!ShouldPropagate()) + { + return false; + } + + if (!ShouldPropagateCreateInCoordinatedTransction()) + { + return false; + } + + return true; +} + + +/* + * AlterPublicationStmtObjectAddress generates the object address for the + * publication altered by a regular ALTER PUBLICATION .. statement. + */ +List * +AlterPublicationStmtObjectAddress(Node *node, bool missingOk, bool isPostProcess) +{ + AlterPublicationStmt *stmt = castNode(AlterPublicationStmt, node); + + return ObjectAddressForPublicationName(stmt->pubname, missingOk); +} + + +/* + * AlterPublicationOwnerStmtObjectAddress generates the object address for the + * publication altered by the given ALTER PUBLICATION .. OWNER TO statement. + */ +List * +AlterPublicationOwnerStmtObjectAddress(Node *node, bool missingOk, bool isPostProcess) +{ + AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node); + + return ObjectAddressForPublicationName(strVal(stmt->object), missingOk); +} + + +/* + * CreatePublicationStmtObjectAddress generates the object address for the + * publication created by the given CREATE PUBLICATION statement. + */ +List * +CreatePublicationStmtObjectAddress(Node *node, bool missingOk, bool isPostProcess) +{ + CreatePublicationStmt *stmt = castNode(CreatePublicationStmt, node); + + return ObjectAddressForPublicationName(stmt->pubname, missingOk); +} + + +/* + * RenamePublicationStmtObjectAddress generates the object address for the + * publication altered by the given ALter PUBLICATION .. RENAME TO statement. + */ +List * +RenamePublicationStmtObjectAddress(Node *node, bool missingOk, bool isPostprocess) +{ + RenameStmt *stmt = castNode(RenameStmt, node); + + return ObjectAddressForPublicationName(strVal(stmt->object), missingOk); +} + + +/* + * ObjectAddressForPublicationName returns the object address for a given publication + * name. + */ +static List * +ObjectAddressForPublicationName(char *publicationName, bool missingOk) +{ + Oid publicationId = InvalidOid; + + HeapTuple publicationTuple = + SearchSysCache1(PUBLICATIONNAME, CStringGetDatum(publicationName)); + if (HeapTupleIsValid(publicationTuple)) + { + Form_pg_publication publicationForm = + (Form_pg_publication) GETSTRUCT(publicationTuple); + publicationId = publicationForm->oid; + + ReleaseSysCache(publicationTuple); + } + else if (!missingOk) + { + /* it should have just been created */ + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("publication \"%s\" does not exist", publicationName))); + } + + ObjectAddress *address = palloc0(sizeof(ObjectAddress)); + ObjectAddressSet(*address, PublicationRelationId, publicationId); + + return list_make1(address); +} diff --git a/src/backend/distributed/commands/sequence.c b/src/backend/distributed/commands/sequence.c index e8c217bb5..f1757bb62 100644 --- a/src/backend/distributed/commands/sequence.c +++ b/src/backend/distributed/commands/sequence.c @@ -33,7 +33,8 @@ /* Local functions forward declarations for helper functions */ static bool OptionsSpecifyOwnedBy(List *optionList, Oid *ownedByTableId); -static Oid SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress); +static Oid SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress, char + depType); static List * FilterDistributedSequences(GrantStmt *stmt); @@ -183,7 +184,7 @@ ExtractDefaultColumnsAndOwnedSequences(Oid relationId, List **columnNameList, char *columnName = NameStr(attributeForm->attname); List *columnOwnedSequences = - getOwnedSequences_internal(relationId, attributeIndex + 1, 0); + getOwnedSequences_internal(relationId, attributeIndex + 1, DEPENDENCY_AUTO); if (attributeForm->atthasdef && list_length(columnOwnedSequences) == 0) { @@ -453,21 +454,22 @@ PreprocessAlterSequenceStmt(Node *node, const char *queryString, /* the code-path only supports a single object */ Assert(list_length(addresses) == 1); + /* We have already asserted that we have exactly 1 address in the addresses. */ + ObjectAddress *address = linitial(addresses); + /* error out if the sequence is distributed */ - if (IsAnyObjectDistributed(addresses)) + if (IsAnyObjectDistributed(addresses) || SequenceUsedInDistributedTable(address, + DEPENDENCY_INTERNAL)) { ereport(ERROR, (errmsg( "Altering a distributed sequence is currently not supported."))); } - /* We have already asserted that we have exactly 1 address in the addresses. */ - ObjectAddress *address = linitial(addresses); - /* * error out if the sequence is used in a distributed table * and this is an ALTER SEQUENCE .. AS .. statement */ - Oid citusTableId = SequenceUsedInDistributedTable(address); + Oid citusTableId = SequenceUsedInDistributedTable(address, DEPENDENCY_AUTO); if (citusTableId != InvalidOid) { List *options = stmt->options; @@ -497,16 +499,19 @@ PreprocessAlterSequenceStmt(Node *node, const char *queryString, * SequenceUsedInDistributedTable returns true if the argument sequence * is used as the default value of a column in a distributed table. * Returns false otherwise + * See DependencyType for the possible values of depType. + * We use DEPENDENCY_INTERNAL for sequences created by identity column. + * DEPENDENCY_AUTO for regular sequences. */ static Oid -SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress) +SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress, char depType) { List *citusTableIdList = CitusTableTypeIdList(ANY_CITUS_TABLE_TYPE); Oid citusTableId = InvalidOid; foreach_oid(citusTableId, citusTableIdList) { List *seqInfoList = NIL; - GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0); + GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0, depType); SequenceInfo *seqInfo = NULL; foreach_ptr(seqInfo, seqInfoList) { diff --git a/src/backend/distributed/commands/table.c b/src/backend/distributed/commands/table.c index 39a652f10..6d5fcda3f 100644 --- a/src/backend/distributed/commands/table.c +++ b/src/backend/distributed/commands/table.c @@ -75,7 +75,7 @@ static void DistributePartitionUsingParent(Oid parentRelationId, static void ErrorIfMultiLevelPartitioning(Oid parentRelationId, Oid partitionRelationId); static void ErrorIfAttachCitusTableToPgLocalTable(Oid parentRelationId, Oid partitionRelationId); -static bool AlterTableDefinesFKeyBetweenPostgresAndNonDistTable( +static bool ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef( AlterTableStmt *alterTableStatement); static bool ShouldMarkConnectedRelationsNotAutoConverted(Oid leftRelationId, Oid rightRelationId); @@ -1119,7 +1119,7 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand, if (ShouldEnableLocalReferenceForeignKeys() && processUtilityContext != PROCESS_UTILITY_SUBCOMMAND && - AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(alterTableStatement)) + ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(alterTableStatement)) { /* * We don't process subcommands generated by postgres. @@ -1378,29 +1378,6 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand, } } - /* - * We check for ADD COLUMN .. GENERATED .. AS IDENTITY expr - * since it uses a sequence as an internal dependency - * we should deparse the statement - */ - constraint = NULL; - foreach_ptr(constraint, columnConstraints) - { - if (constraint->contype == CONSTR_IDENTITY) - { - deparseAT = true; - useInitialDDLCommandString = false; - - /* - * Since we don't support constraints for AT_AddColumn - * we have to set is_not_null to true explicitly for identity columns - */ - ColumnDef *newColDef = copyObject(columnDefinition); - newColDef->constraints = NULL; - newColDef->is_not_null = true; - newCmd->def = (Node *) newColDef; - } - } /* * We check for ADD COLUMN .. SERIAL pseudo-type @@ -1584,12 +1561,12 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand, /* - * AlterTableDefinesFKeyBetweenPostgresAndNonDistTable returns true if given + * ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef returns true if given * alter table command defines foreign key between a postgres table and a * reference or citus local table. */ static bool -AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(AlterTableStmt *alterTableStatement) +ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(AlterTableStmt *alterTableStatement) { List *foreignKeyConstraintList = GetAlterTableAddFKeyConstraintList(alterTableStatement); @@ -1607,9 +1584,12 @@ AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(AlterTableStmt *alterTableSt if (!IsCitusTable(leftRelationId)) { return RelationIdListContainsCitusTableType(rightRelationIdList, - CITUS_TABLE_WITH_NO_DIST_KEY); + CITUS_LOCAL_TABLE) || + RelationIdListContainsCitusTableType(rightRelationIdList, + REFERENCE_TABLE); } - else if (IsCitusTableType(leftRelationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + else if (IsCitusTableType(leftRelationId, CITUS_LOCAL_TABLE) || + IsCitusTableType(leftRelationId, REFERENCE_TABLE)) { return RelationIdListContainsPostgresTable(rightRelationIdList); } @@ -2539,34 +2519,6 @@ PostprocessAlterTableStmt(AlterTableStmt *alterTableStatement) } } } - - /* - * We check for ADD COLUMN .. GENERATED AS IDENTITY expr - * since it uses a seqeunce as an internal dependency - */ - constraint = NULL; - foreach_ptr(constraint, columnConstraints) - { - if (constraint->contype == CONSTR_IDENTITY) - { - AttrNumber attnum = get_attnum(relationId, - columnDefinition->colname); - bool missing_ok = false; - Oid seqOid = getIdentitySequence(relationId, attnum, missing_ok); - - if (ShouldSyncTableMetadata(relationId)) - { - needMetadataSyncForNewSequences = true; - alterTableDefaultNextvalCmd = - GetAddColumnWithNextvalDefaultCmd(seqOid, - relationId, - columnDefinition - ->colname, - columnDefinition - ->typeName); - } - } - } } /* * We check for ALTER COLUMN .. SET DEFAULT nextval('user_defined_seq') @@ -3222,6 +3174,17 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement) { if (columnConstraint->contype == CONSTR_IDENTITY) { + /* + * We currently don't support adding an identity column for an MX table + */ + if (ShouldSyncTableMetadata(relationId)) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg( + "cannot execute ADD COLUMN commands involving identity" + " columns when metadata is synchronized to workers"))); + } + /* * Currently we don't support backfilling the new identity column with default values * if the table is not empty @@ -3352,7 +3315,8 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement) */ AttrNumber attnum = get_attnum(relationId, command->name); List *seqInfoList = NIL; - GetDependentSequencesWithRelation(relationId, &seqInfoList, attnum); + GetDependentSequencesWithRelation(relationId, &seqInfoList, attnum, + DEPENDENCY_AUTO); if (seqInfoList != NIL) { ereport(ERROR, (errmsg("cannot execute ALTER COLUMN TYPE .. command " @@ -3666,7 +3630,7 @@ SetupExecutionModeForAlterTable(Oid relationId, AlterTableCmd *command) * sequential mode. */ if (executeSequentially && - !IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY) && + HasDistributionKey(relationId) && ParallelQueryExecutedInTransaction()) { char *relationName = get_rel_name(relationId); @@ -4011,3 +3975,59 @@ MakeNameListFromRangeVar(const RangeVar *rel) return list_make1(makeString(rel->relname)); } } + + +/* + * ErrorIfTableHasUnsupportedIdentityColumn errors out if the given table has any identity column other than bigint identity column. + */ +void +ErrorIfTableHasUnsupportedIdentityColumn(Oid relationId) +{ + Relation relation = relation_open(relationId, AccessShareLock); + TupleDesc tupleDescriptor = RelationGetDescr(relation); + + for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts; + attributeIndex++) + { + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex); + + if (attributeForm->attidentity && attributeForm->atttypid != INT8OID) + { + char *qualifiedRelationName = generate_qualified_relation_name(relationId); + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg( + "cannot complete operation on %s with smallint/int identity column", + qualifiedRelationName), + errhint( + "Use bigint identity column instead."))); + } + } + + relation_close(relation, NoLock); +} + + +/* + * ErrorIfTableHasIdentityColumn errors out if the given table has identity column + */ +void +ErrorIfTableHasIdentityColumn(Oid relationId) +{ + Relation relation = relation_open(relationId, AccessShareLock); + TupleDesc tupleDescriptor = RelationGetDescr(relation); + + for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts; + attributeIndex++) + { + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex); + + if (attributeForm->attidentity) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg( + "cannot complete operation on a table with identity column"))); + } + } + + relation_close(relation, NoLock); +} diff --git a/src/backend/distributed/commands/truncate.c b/src/backend/distributed/commands/truncate.c index 0993c287f..52f769a11 100644 --- a/src/backend/distributed/commands/truncate.c +++ b/src/backend/distributed/commands/truncate.c @@ -324,7 +324,7 @@ ExecuteTruncateStmtSequentialIfNecessary(TruncateStmt *command) { Oid relationId = RangeVarGetRelid(rangeVar, NoLock, failOK); - if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY) && + if (IsCitusTable(relationId) && !HasDistributionKey(relationId) && TableReferenced(relationId)) { char *relationName = get_rel_name(relationId); diff --git a/src/backend/distributed/commands/utility_hook.c b/src/backend/distributed/commands/utility_hook.c index 899384ad5..cdef7ab97 100644 --- a/src/backend/distributed/commands/utility_hook.c +++ b/src/backend/distributed/commands/utility_hook.c @@ -53,6 +53,7 @@ #include "distributed/coordinator_protocol.h" #include "distributed/deparser.h" #include "distributed/deparse_shard_query.h" +#include "distributed/executor_util.h" #include "distributed/foreign_key_relationship.h" #include "distributed/listutils.h" #include "distributed/local_executor.h" diff --git a/src/backend/distributed/connection/connection_management.c b/src/backend/distributed/connection/connection_management.c index 8ab35ca42..e4aca3ee7 100644 --- a/src/backend/distributed/connection/connection_management.c +++ b/src/backend/distributed/connection/connection_management.c @@ -1202,6 +1202,17 @@ FinishConnectionEstablishment(MultiConnection *connection) } +/* + * ForceConnectionCloseAtTransactionEnd marks connection to be closed at the end of the + * transaction. + */ +void +ForceConnectionCloseAtTransactionEnd(MultiConnection *connection) +{ + connection->forceCloseAtTransactionEnd = true; +} + + /* * ClaimConnectionExclusively signals that this connection is actively being * used. That means it'll not be, again, returned by @@ -1484,6 +1495,7 @@ AfterXactHostConnectionHandling(ConnectionHashEntry *entry, bool isCommit) * - Current cached connections is already at MaxCachedConnectionsPerWorker * - Connection is forced to close at the end of transaction * - Connection is not in OK state + * - Connection has a replication origin setup * - A transaction is still in progress (usually because we are cancelling a distributed transaction) * - A connection reached its maximum lifetime */ @@ -1503,6 +1515,7 @@ ShouldShutdownConnection(MultiConnection *connection, const int cachedConnection PQstatus(connection->pgConn) != CONNECTION_OK || !RemoteTransactionIdle(connection) || connection->requiresReplication || + connection->isReplicationOriginSessionSetup || (MaxCachedConnectionLifetime >= 0 && MillisecondsToTimeout(connection->connectionEstablishmentStart, MaxCachedConnectionLifetime) <= 0); diff --git a/src/backend/distributed/connection/remote_commands.c b/src/backend/distributed/connection/remote_commands.c index 906d78e42..1dfd51781 100644 --- a/src/backend/distributed/connection/remote_commands.c +++ b/src/backend/distributed/connection/remote_commands.c @@ -573,6 +573,47 @@ SendRemoteCommand(MultiConnection *connection, const char *command) } +/* + * ExecuteRemoteCommandAndCheckResult executes the given command in the remote node and + * checks if the result is equal to the expected result. If the result is equal to the + * expected result, the function returns true, otherwise it returns false. + */ +bool +ExecuteRemoteCommandAndCheckResult(MultiConnection *connection, char *command, + char *expected) +{ + if (!SendRemoteCommand(connection, command)) + { + /* if we cannot connect, we warn and report false */ + ReportConnectionError(connection, WARNING); + return false; + } + bool raiseInterrupts = true; + PGresult *queryResult = GetRemoteCommandResult(connection, raiseInterrupts); + + /* if remote node throws an error, we also throw an error */ + if (!IsResponseOK(queryResult)) + { + ReportResultError(connection, queryResult, ERROR); + } + + StringInfo queryResultString = makeStringInfo(); + + /* Evaluate the queryResult and store it into the queryResultString */ + bool success = EvaluateSingleQueryResult(connection, queryResult, queryResultString); + bool result = false; + if (success && strcmp(queryResultString->data, expected) == 0) + { + result = true; + } + + PQclear(queryResult); + ForgetResults(connection); + + return result; +} + + /* * ReadFirstColumnAsText reads the first column of result tuples from the given * PGresult struct and returns them in a StringInfo list. diff --git a/src/backend/distributed/deparser/citus_ruleutils.c b/src/backend/distributed/deparser/citus_ruleutils.c index ada77b098..05e483766 100644 --- a/src/backend/distributed/deparser/citus_ruleutils.c +++ b/src/backend/distributed/deparser/citus_ruleutils.c @@ -304,10 +304,7 @@ pg_get_sequencedef(Oid sequenceRelationId) * When it's WORKER_NEXTVAL_SEQUENCE_DEFAULTS, the function creates the DEFAULT * clause using worker_nextval('sequence') and not nextval('sequence') * When IncludeIdentities is NO_IDENTITY, the function does not include identity column - * specifications. When it's INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS, the function - * uses sequences and set them as default values for identity columns by using exactly - * the same approach with worker_nextval('sequence') & nextval('sequence') logic - * desribed above. When it's INCLUDE_IDENTITY it creates GENERATED .. AS IDENTIY clauses. + * specifications. When it's INCLUDE_IDENTITY it creates GENERATED .. AS IDENTIY clauses. */ char * pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults @@ -403,26 +400,9 @@ pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults Oid seqOid = getIdentitySequence(RelationGetRelid(relation), attributeForm->attnum, missing_ok); - char *sequenceName = generate_qualified_relation_name(seqOid); - - if (includeIdentityDefaults == INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS) - { - if (pg_get_sequencedef(seqOid)->seqtypid != INT8OID) - { - appendStringInfo(&buffer, - " DEFAULT worker_nextval(%s::regclass)", - quote_literal_cstr(sequenceName)); - } - else - { - appendStringInfo(&buffer, " DEFAULT nextval(%s::regclass)", - quote_literal_cstr(sequenceName)); - } - } - else if (includeIdentityDefaults == INCLUDE_IDENTITY) + if (includeIdentityDefaults == INCLUDE_IDENTITY) { Form_pg_sequence pgSequenceForm = pg_get_sequencedef(seqOid); - uint64 sequenceStart = nextval_internal(seqOid, false); char *sequenceDef = psprintf( " GENERATED %s AS IDENTITY (INCREMENT BY " INT64_FORMAT \ " MINVALUE " INT64_FORMAT " MAXVALUE " @@ -433,7 +413,8 @@ pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults "ALWAYS" : "BY DEFAULT", pgSequenceForm->seqincrement, pgSequenceForm->seqmin, - pgSequenceForm->seqmax, sequenceStart, + pgSequenceForm->seqmax, + pgSequenceForm->seqstart, pgSequenceForm->seqcache, pgSequenceForm->seqcycle ? "" : "NO "); @@ -1391,7 +1372,7 @@ convert_aclright_to_string(int aclright) /* * contain_nextval_expression_walker walks over expression tree and returns - * true if it contains call to 'nextval' function. + * true if it contains call to 'nextval' function or it has an identity column. */ bool contain_nextval_expression_walker(Node *node, void *context) @@ -1401,6 +1382,13 @@ contain_nextval_expression_walker(Node *node, void *context) return false; } + /* check if the node contains an identity column */ + if (IsA(node, NextValueExpr)) + { + return true; + } + + /* check if the node contains call to 'nextval' */ if (IsA(node, FuncExpr)) { FuncExpr *funcExpr = (FuncExpr *) node; diff --git a/src/backend/distributed/deparser/deparse_publication_stmts.c b/src/backend/distributed/deparser/deparse_publication_stmts.c new file mode 100644 index 000000000..deb8e7285 --- /dev/null +++ b/src/backend/distributed/deparser/deparse_publication_stmts.c @@ -0,0 +1,690 @@ +/*------------------------------------------------------------------------- + * + * deparse_publication_stmts.c + * All routines to deparse publication statements. + * + * Copyright (c) Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/relation.h" +#include "catalog/namespace.h" +#include "commands/defrem.h" +#include "distributed/citus_ruleutils.h" +#include "distributed/deparser.h" +#include "distributed/listutils.h" +#include "distributed/namespace_utils.h" +#include "lib/stringinfo.h" +#include "parser/parse_clause.h" +#include "parser/parse_collate.h" +#include "parser/parse_node.h" +#include "parser/parse_relation.h" +#include "nodes/value.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/ruleutils.h" + + +static void AppendCreatePublicationStmt(StringInfo buf, CreatePublicationStmt *stmt, + bool whereClauseNeedsTransform, + bool includeLocalTables); +#if (PG_VERSION_NUM >= PG_VERSION_15) +static bool AppendPublicationObjects(StringInfo buf, List *publicationObjects, + bool whereClauseNeedsTransform, + bool includeLocalTables); +static void AppendWhereClauseExpression(StringInfo buf, RangeVar *tableName, + Node *whereClause, + bool whereClauseNeedsTransform); +static void AppendAlterPublicationAction(StringInfo buf, AlterPublicationAction action); +#else +static bool AppendTables(StringInfo buf, List *tables, bool includeLocalTables); +static void AppendDefElemAction(StringInfo buf, DefElemAction action); +#endif +static bool AppendAlterPublicationStmt(StringInfo buf, AlterPublicationStmt *stmt, + bool whereClauseNeedsTransform, + bool includeLocalTables); +static void AppendDropPublicationStmt(StringInfo buf, DropStmt *stmt); +static void AppendRenamePublicationStmt(StringInfo buf, RenameStmt *stmt); +static void AppendAlterPublicationOwnerStmt(StringInfo buf, AlterOwnerStmt *stmt); +static void AppendPublicationOptions(StringInfo stringBuffer, List *optionList); +static void AppendIdentifierList(StringInfo buf, List *objects); + + +/* + * DeparseCreatePublicationStmt builds and returns a string representing a + * CreatePublicationStmt. + */ +char * +DeparseCreatePublicationStmt(Node *node) +{ + /* regular deparsing function takes CREATE PUBLICATION from the parser */ + bool whereClauseNeedsTransform = false; + + /* for regular CREATE PUBLICATION we do not propagate local tables */ + bool includeLocalTables = false; + + return DeparseCreatePublicationStmtExtended(node, whereClauseNeedsTransform, + includeLocalTables); +} + + +/* + * DeparseCreatePublicationStmtExtended builds and returns a string representing a + * CreatePublicationStmt, which may have already-transformed expressions. + */ +char * +DeparseCreatePublicationStmtExtended(Node *node, bool whereClauseNeedsTransform, + bool includeLocalTables) +{ + CreatePublicationStmt *stmt = castNode(CreatePublicationStmt, node); + + StringInfoData str = { 0 }; + initStringInfo(&str); + + AppendCreatePublicationStmt(&str, stmt, whereClauseNeedsTransform, + includeLocalTables); + + return str.data; +} + + +/* + * AppendCreatePublicationStmt appends a string representing a + * CreatePublicationStmt to a buffer. + */ +static void +AppendCreatePublicationStmt(StringInfo buf, CreatePublicationStmt *stmt, + bool whereClauseNeedsTransform, + bool includeLocalTables) +{ + appendStringInfo(buf, "CREATE PUBLICATION %s", + quote_identifier(stmt->pubname)); + + if (stmt->for_all_tables) + { + appendStringInfoString(buf, " FOR ALL TABLES"); + } +#if (PG_VERSION_NUM >= PG_VERSION_15) + else if (stmt->pubobjects != NIL) + { + bool hasObjects = false; + PublicationObjSpec *publicationObject = NULL; + + /* + * Check whether there are objects to propagate, mainly to know whether + * we should include "FOR". + */ + foreach_ptr(publicationObject, stmt->pubobjects) + { + if (publicationObject->pubobjtype == PUBLICATIONOBJ_TABLE) + { + /* FOR TABLE ... */ + PublicationTable *publicationTable = publicationObject->pubtable; + + if (includeLocalTables || + IsCitusTableRangeVar(publicationTable->relation, NoLock, false)) + { + hasObjects = true; + break; + } + } + else + { + hasObjects = true; + break; + } + } + + if (hasObjects) + { + appendStringInfoString(buf, " FOR"); + AppendPublicationObjects(buf, stmt->pubobjects, whereClauseNeedsTransform, + includeLocalTables); + } + } +#else + else if (stmt->tables != NIL) + { + bool hasTables = false; + RangeVar *rangeVar = NULL; + + /* + * Check whether there are tables to propagate, mainly to know whether + * we should include "FOR". + */ + foreach_ptr(rangeVar, stmt->tables) + { + if (includeLocalTables || IsCitusTableRangeVar(rangeVar, NoLock, false)) + { + hasTables = true; + break; + } + } + + if (hasTables) + { + appendStringInfoString(buf, " FOR"); + AppendTables(buf, stmt->tables, includeLocalTables); + } + } +#endif + + if (stmt->options != NIL) + { + appendStringInfoString(buf, " WITH ("); + AppendPublicationOptions(buf, stmt->options); + appendStringInfoString(buf, ")"); + } +} + + +#if (PG_VERSION_NUM >= PG_VERSION_15) + +/* + * AppendPublicationObjects appends a string representing a list of publication + * objects to a buffer. + * + * For instance: TABLE users, departments, TABLES IN SCHEMA production + */ +static bool +AppendPublicationObjects(StringInfo buf, List *publicationObjects, + bool whereClauseNeedsTransform, + bool includeLocalTables) +{ + PublicationObjSpec *publicationObject = NULL; + bool appendedObject = false; + + foreach_ptr(publicationObject, publicationObjects) + { + if (publicationObject->pubobjtype == PUBLICATIONOBJ_TABLE) + { + /* FOR TABLE ... */ + PublicationTable *publicationTable = publicationObject->pubtable; + RangeVar *rangeVar = publicationTable->relation; + char *schemaName = rangeVar->schemaname; + char *tableName = rangeVar->relname; + + if (!includeLocalTables && !IsCitusTableRangeVar(rangeVar, NoLock, false)) + { + /* do not propagate local tables */ + continue; + } + + if (schemaName != NULL) + { + /* qualified table name */ + appendStringInfo(buf, "%s TABLE %s", + appendedObject ? "," : "", + quote_qualified_identifier(schemaName, tableName)); + } + else + { + /* unqualified table name */ + appendStringInfo(buf, "%s TABLE %s", + appendedObject ? "," : "", + quote_identifier(tableName)); + } + + if (publicationTable->columns != NIL) + { + appendStringInfoString(buf, " ("); + AppendIdentifierList(buf, publicationTable->columns); + appendStringInfoString(buf, ")"); + } + + if (publicationTable->whereClause != NULL) + { + appendStringInfoString(buf, " WHERE ("); + + AppendWhereClauseExpression(buf, rangeVar, + publicationTable->whereClause, + whereClauseNeedsTransform); + + appendStringInfoString(buf, ")"); + } + } + else + { + /* FOR TABLES IN SCHEMA */ + char *schemaName = publicationObject->name; + + if (publicationObject->pubobjtype == PUBLICATIONOBJ_TABLES_IN_CUR_SCHEMA) + { + List *searchPath = fetch_search_path(false); + if (searchPath == NIL) + { + ereport(ERROR, errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("no schema has been selected for " + "CURRENT_SCHEMA")); + } + + schemaName = get_namespace_name(linitial_oid(searchPath)); + } + + appendStringInfo(buf, "%s TABLES IN SCHEMA %s", + appendedObject ? "," : "", + quote_identifier(schemaName)); + } + + appendedObject = true; + } + + return appendedObject; +} + + +/* + * AppendWhereClauseExpression appends a deparsed expression that can + * contain a filter on the given table. If whereClauseNeedsTransform is set + * the expression is first tranformed. + */ +static void +AppendWhereClauseExpression(StringInfo buf, RangeVar *tableName, + Node *whereClause, bool whereClauseNeedsTransform) +{ + Relation relation = relation_openrv(tableName, AccessShareLock); + + if (whereClauseNeedsTransform) + { + ParseState *pstate = make_parsestate(NULL); + pstate->p_sourcetext = ""; + ParseNamespaceItem *nsitem = addRangeTableEntryForRelation(pstate, + relation, + AccessShareLock, NULL, + false, false); + addNSItemToQuery(pstate, nsitem, false, true, true); + + whereClause = transformWhereClause(pstate, + copyObject(whereClause), + EXPR_KIND_WHERE, + "PUBLICATION WHERE"); + + assign_expr_collations(pstate, whereClause); + } + + List *relationContext = deparse_context_for(tableName->relname, relation->rd_id); + + PushOverrideEmptySearchPath(CurrentMemoryContext); + char *whereClauseString = deparse_expression(whereClause, + relationContext, + true, true); + PopOverrideSearchPath(); + + appendStringInfoString(buf, whereClauseString); + + relation_close(relation, AccessShareLock); +} + + +#else + +/* + * AppendPublicationObjects appends a string representing a list of publication + * objects to a buffer. + * + * For instance: TABLE users, departments + */ +static bool +AppendTables(StringInfo buf, List *tables, bool includeLocalTables) +{ + RangeVar *rangeVar = NULL; + bool appendedObject = false; + + foreach_ptr(rangeVar, tables) + { + if (!includeLocalTables && + !IsCitusTableRangeVar(rangeVar, NoLock, false)) + { + /* do not propagate local tables */ + continue; + } + + char *schemaName = rangeVar->schemaname; + char *tableName = rangeVar->relname; + + if (schemaName != NULL) + { + /* qualified table name */ + appendStringInfo(buf, "%s %s", + appendedObject ? "," : " TABLE", + quote_qualified_identifier(schemaName, tableName)); + } + else + { + /* unqualified table name */ + appendStringInfo(buf, "%s %s", + appendedObject ? "," : " TABLE", + quote_identifier(tableName)); + } + + appendedObject = true; + } + + return appendedObject; +} + + +#endif + + +/* + * DeparseAlterPublicationSchemaStmt builds and returns a string representing + * an AlterPublicationStmt. + */ +char * +DeparseAlterPublicationStmt(Node *node) +{ + /* regular deparsing function takes ALTER PUBLICATION from the parser */ + bool whereClauseNeedsTransform = true; + + /* for regular ALTER PUBLICATION we do not propagate local tables */ + bool includeLocalTables = false; + + return DeparseAlterPublicationStmtExtended(node, whereClauseNeedsTransform, + includeLocalTables); +} + + +/* + * DeparseAlterPublicationStmtExtended builds and returns a string representing a + * AlterPublicationStmt, which may have already-transformed expressions. + */ +char * +DeparseAlterPublicationStmtExtended(Node *node, bool whereClauseNeedsTransform, + bool includeLocalTables) +{ + AlterPublicationStmt *stmt = castNode(AlterPublicationStmt, node); + StringInfoData str = { 0 }; + initStringInfo(&str); + + if (!AppendAlterPublicationStmt(&str, stmt, whereClauseNeedsTransform, + includeLocalTables)) + { + Assert(!includeLocalTables); + + /* + * When there are no objects to propagate, then there is no + * valid ALTER PUBLICATION to construct. + */ + return NULL; + } + + return str.data; +} + + +/* + * AppendAlterPublicationStmt appends a string representing an AlterPublicationStmt + * of the form ALTER PUBLICATION .. ADD/SET/DROP + */ +static bool +AppendAlterPublicationStmt(StringInfo buf, AlterPublicationStmt *stmt, + bool whereClauseNeedsTransform, + bool includeLocalTables) +{ + appendStringInfo(buf, "ALTER PUBLICATION %s", + quote_identifier(stmt->pubname)); + + if (stmt->options) + { + appendStringInfoString(buf, " SET ("); + AppendPublicationOptions(buf, stmt->options); + appendStringInfoString(buf, ")"); + + /* changing options cannot be combined with other actions */ + return true; + } + +#if (PG_VERSION_NUM >= PG_VERSION_15) + AppendAlterPublicationAction(buf, stmt->action); + return AppendPublicationObjects(buf, stmt->pubobjects, whereClauseNeedsTransform, + includeLocalTables); +#else + AppendDefElemAction(buf, stmt->tableAction); + return AppendTables(buf, stmt->tables, includeLocalTables); +#endif +} + + +#if (PG_VERSION_NUM >= PG_VERSION_15) + +/* + * AppendAlterPublicationAction appends a string representing an AlterPublicationAction + * to a buffer. + */ +static void +AppendAlterPublicationAction(StringInfo buf, AlterPublicationAction action) +{ + switch (action) + { + case AP_AddObjects: + { + appendStringInfoString(buf, " ADD"); + break; + } + + case AP_DropObjects: + { + appendStringInfoString(buf, " DROP"); + break; + } + + case AP_SetObjects: + { + appendStringInfoString(buf, " SET"); + break; + } + + default: + { + ereport(ERROR, (errmsg("unrecognized publication action: %d", action))); + } + } +} + + +#else + +/* + * AppendDefElemAction appends a string representing a DefElemAction + * to a buffer. + */ +static void +AppendDefElemAction(StringInfo buf, DefElemAction action) +{ + switch (action) + { + case DEFELEM_ADD: + { + appendStringInfoString(buf, " ADD"); + break; + } + + case DEFELEM_DROP: + { + appendStringInfoString(buf, " DROP"); + break; + } + + case DEFELEM_SET: + { + appendStringInfoString(buf, " SET"); + break; + } + + default: + { + ereport(ERROR, (errmsg("unrecognized publication action: %d", action))); + } + } +} + + +#endif + + +/* + * DeparseDropPublicationStmt builds and returns a string representing the DropStmt + */ +char * +DeparseDropPublicationStmt(Node *node) +{ + DropStmt *stmt = castNode(DropStmt, node); + StringInfoData str = { 0 }; + initStringInfo(&str); + + Assert(stmt->removeType == OBJECT_PUBLICATION); + + AppendDropPublicationStmt(&str, stmt); + + return str.data; +} + + +/* + * AppendDropPublicationStmt appends a string representing the DropStmt to a buffer + */ +static void +AppendDropPublicationStmt(StringInfo buf, DropStmt *stmt) +{ + appendStringInfoString(buf, "DROP PUBLICATION "); + if (stmt->missing_ok) + { + appendStringInfoString(buf, "IF EXISTS "); + } + AppendIdentifierList(buf, stmt->objects); + if (stmt->behavior == DROP_CASCADE) + { + appendStringInfoString(buf, " CASCADE"); + } +} + + +/* + * DeparseRenamePublicationStmt builds and returns a string representing the RenameStmt + */ +char * +DeparseRenamePublicationStmt(Node *node) +{ + RenameStmt *stmt = castNode(RenameStmt, node); + StringInfoData str = { 0 }; + initStringInfo(&str); + + Assert(stmt->renameType == OBJECT_PUBLICATION); + + AppendRenamePublicationStmt(&str, stmt); + + return str.data; +} + + +/* + * AppendRenamePublicationStmt appends a string representing the RenameStmt to a buffer + */ +static void +AppendRenamePublicationStmt(StringInfo buf, RenameStmt *stmt) +{ + appendStringInfo(buf, "ALTER PUBLICATION %s RENAME TO %s;", + quote_identifier(strVal(stmt->object)), + quote_identifier(stmt->newname)); +} + + +/* + * DeparseAlterPublicationOwnerStmt builds and returns a string representing the AlterOwnerStmt + */ +char * +DeparseAlterPublicationOwnerStmt(Node *node) +{ + AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node); + StringInfoData str = { 0 }; + initStringInfo(&str); + + Assert(stmt->objectType == OBJECT_PUBLICATION); + + AppendAlterPublicationOwnerStmt(&str, stmt); + + return str.data; +} + + +/* + * AppendAlterPublicationOwnerStmt appends a string representing the AlterOwnerStmt to a buffer + */ +static void +AppendAlterPublicationOwnerStmt(StringInfo buf, AlterOwnerStmt *stmt) +{ + Assert(stmt->objectType == OBJECT_PUBLICATION); + + appendStringInfo(buf, "ALTER PUBLICATION %s OWNER TO %s;", + quote_identifier(strVal(stmt->object)), + RoleSpecString(stmt->newowner, true)); +} + + +/* + * AppendPublicationOptions appends a string representing a list of publication opions. + */ +static void +AppendPublicationOptions(StringInfo stringBuffer, List *optionList) +{ + ListCell *optionCell = NULL; + bool firstOptionPrinted = false; + + foreach(optionCell, optionList) + { + DefElem *option = (DefElem *) lfirst(optionCell); + char *optionName = option->defname; + char *optionValue = defGetString(option); + NodeTag valueType = nodeTag(option->arg); + + if (firstOptionPrinted) + { + appendStringInfo(stringBuffer, ", "); + } + firstOptionPrinted = true; + + appendStringInfo(stringBuffer, "%s = ", + quote_identifier(optionName)); + +#if (PG_VERSION_NUM >= PG_VERSION_15) + if (valueType == T_Integer || valueType == T_Float || valueType == T_Boolean) +#else + if (valueType == T_Integer || valueType == T_Float) +#endif + { + /* string escaping is unnecessary for numeric types and can cause issues */ + appendStringInfo(stringBuffer, "%s", optionValue); + } + else + { + appendStringInfo(stringBuffer, "%s", quote_literal_cstr(optionValue)); + } + } +} + + +/* + * AppendIdentifierList appends a string representing a list of + * identifiers (of String type). + */ +static void +AppendIdentifierList(StringInfo buf, List *objects) +{ + ListCell *objectCell = NULL; + + foreach(objectCell, objects) + { + char *name = strVal(lfirst(objectCell)); + + if (objectCell != list_head(objects)) + { + appendStringInfo(buf, ", "); + } + + appendStringInfoString(buf, quote_identifier(name)); + } +} diff --git a/src/backend/distributed/deparser/qualify_publication_stmt.c b/src/backend/distributed/deparser/qualify_publication_stmt.c new file mode 100644 index 000000000..3231fe363 --- /dev/null +++ b/src/backend/distributed/deparser/qualify_publication_stmt.c @@ -0,0 +1,119 @@ +/*------------------------------------------------------------------------- + * + * qualify_publication_stmt.c + * Functions specialized in fully qualifying all publication statements. These + * functions are dispatched from qualify.c + * + * Copyright (c), Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "catalog/namespace.h" +#include "distributed/deparser.h" +#include "distributed/listutils.h" +#include "nodes/nodes.h" +#include "utils/guc.h" +#include "utils/lsyscache.h" + +#if (PG_VERSION_NUM >= PG_VERSION_15) +static void QualifyPublicationObjects(List *publicationObjects); +#else +static void QualifyTables(List *tables); +#endif +static void QualifyPublicationRangeVar(RangeVar *publication); + + +/* + * QualifyCreatePublicationStmt quailifies the publication names of the + * CREATE PUBLICATION statement. + */ +void +QualifyCreatePublicationStmt(Node *node) +{ + CreatePublicationStmt *stmt = castNode(CreatePublicationStmt, node); + +#if (PG_VERSION_NUM >= PG_VERSION_15) + QualifyPublicationObjects(stmt->pubobjects); +#else + QualifyTables(stmt->tables); +#endif +} + + +#if (PG_VERSION_NUM >= PG_VERSION_15) + +/* + * QualifyPublicationObjects ensures all table names in a list of + * publication objects are fully qualified. + */ +static void +QualifyPublicationObjects(List *publicationObjects) +{ + PublicationObjSpec *publicationObject = NULL; + + foreach_ptr(publicationObject, publicationObjects) + { + if (publicationObject->pubobjtype == PUBLICATIONOBJ_TABLE) + { + /* FOR TABLE ... */ + PublicationTable *publicationTable = publicationObject->pubtable; + + QualifyPublicationRangeVar(publicationTable->relation); + } + } +} + + +#else + +/* + * QualifyTables ensures all table names in a list are fully qualified. + */ +static void +QualifyTables(List *tables) +{ + RangeVar *rangeVar = NULL; + + foreach_ptr(rangeVar, tables) + { + QualifyPublicationRangeVar(rangeVar); + } +} + + +#endif + + +/* + * QualifyPublicationObjects ensures all table names in a list of + * publication objects are fully qualified. + */ +void +QualifyAlterPublicationStmt(Node *node) +{ + AlterPublicationStmt *stmt = castNode(AlterPublicationStmt, node); + +#if (PG_VERSION_NUM >= PG_VERSION_15) + QualifyPublicationObjects(stmt->pubobjects); +#else + QualifyTables(stmt->tables); +#endif +} + + +/* + * QualifyPublicationRangeVar qualifies the given publication RangeVar if it is not qualified. + */ +static void +QualifyPublicationRangeVar(RangeVar *publication) +{ + if (publication->schemaname == NULL) + { + Oid publicationOid = RelnameGetRelid(publication->relname); + Oid schemaOid = get_rel_namespace(publicationOid); + publication->schemaname = get_namespace_name(schemaOid); + } +} diff --git a/src/backend/distributed/deparser/ruleutils_15.c b/src/backend/distributed/deparser/ruleutils_15.c index 6dabacd49..827492d87 100644 --- a/src/backend/distributed/deparser/ruleutils_15.c +++ b/src/backend/distributed/deparser/ruleutils_15.c @@ -53,6 +53,7 @@ #include "common/keywords.h" #include "distributed/citus_nodefuncs.h" #include "distributed/citus_ruleutils.h" +#include "distributed/multi_router_planner.h" #include "executor/spi.h" #include "foreign/foreign.h" #include "funcapi.h" @@ -3723,7 +3724,6 @@ static void get_merge_query_def(Query *query, deparse_context *context) { StringInfo buf = context->buf; - RangeTblEntry *targetRte; /* Insert the WITH clause if given */ get_with_clause(query, context); @@ -3731,7 +3731,7 @@ get_merge_query_def(Query *query, deparse_context *context) /* * Start the query with MERGE INTO */ - targetRte = rt_fetch(query->resultRelation, query->rtable); + RangeTblEntry *targetRte = ExtractResultRelationRTE(query); if (PRETTY_INDENT(context)) { @@ -3853,6 +3853,15 @@ get_merge_query_def(Query *query, deparse_context *context) } } + /* + * RETURNING is not supported in MERGE, so it must be NULL, but if PG adds it later, + * we might miss it, let's raise an exception to investigate. + */ + if (unlikely(query->returningList)) + { + elog(ERROR, "Unexpected RETURNING clause in MERGE"); + } + ereport(DEBUG1, (errmsg("", buf->data))); } diff --git a/src/backend/distributed/executor/adaptive_executor.c b/src/backend/distributed/executor/adaptive_executor.c index 8369878b7..4bb2d5e57 100644 --- a/src/backend/distributed/executor/adaptive_executor.c +++ b/src/backend/distributed/executor/adaptive_executor.c @@ -141,6 +141,7 @@ #include "distributed/connection_management.h" #include "distributed/commands/multi_copy.h" #include "distributed/deparse_shard_query.h" +#include "distributed/executor_util.h" #include "distributed/shared_connection_stats.h" #include "distributed/distributed_execution_locks.h" #include "distributed/intermediate_result_pruning.h" @@ -655,14 +656,7 @@ static void SequentialRunDistributedExecution(DistributedExecution *execution); static void FinishDistributedExecution(DistributedExecution *execution); static void CleanUpSessions(DistributedExecution *execution); -static void LockPartitionsForDistributedPlan(DistributedPlan *distributedPlan); -static void AcquireExecutorShardLocksForExecution(DistributedExecution *execution); -static bool ModifiedTableReplicated(List *taskList); static bool DistributedExecutionModifiesDatabase(DistributedExecution *execution); -static bool TaskListModifiesDatabase(RowModifyLevel modLevel, List *taskList); -static bool DistributedExecutionRequiresRollback(List *taskList); -static bool TaskListRequires2PC(List *taskList); -static bool SelectForUpdateOnReferenceTable(List *taskList); static void AssignTasksToConnectionsOrWorkerPool(DistributedExecution *execution); static void UnclaimAllSessionConnections(List *sessionList); static PlacementExecutionOrder ExecutionOrderForTask(RowModifyLevel modLevel, Task *task); @@ -718,10 +712,6 @@ static bool CanFailoverPlacementExecutionToLocalExecution(TaskPlacementExecution static void PlacementExecutionReady(TaskPlacementExecution *placementExecution); static TaskExecutionState TaskExecutionStateMachine(ShardCommandExecution * shardCommandExecution); -static bool HasDependentJobs(Job *mainJob); -static void ExtractParametersForRemoteExecution(ParamListInfo paramListInfo, - Oid **parameterTypes, - const char ***parameterValues); static int GetEventSetSize(List *sessionList); static bool ProcessSessionsWithFailedWaitEventSetOperations( DistributedExecution *execution); @@ -737,14 +727,10 @@ static void ProcessWaitEventsForSocketClosed(WaitEvent *events, int eventCount); #endif static long MillisecondsBetweenTimestamps(instr_time startTime, instr_time endTime); static uint64 MicrosecondsBetweenTimestamps(instr_time startTime, instr_time endTime); -static HeapTuple BuildTupleFromBytes(AttInMetadata *attinmeta, fmStringInfo *values); -static AttInMetadata * TupleDescGetAttBinaryInMetadata(TupleDesc tupdesc); static int WorkerPoolCompare(const void *lhsKey, const void *rhsKey); static void SetAttributeInputMetadata(DistributedExecution *execution, ShardCommandExecution *shardCommandExecution); -static void LookupTaskPlacementHostAndPort(ShardPlacement *taskPlacement, char **nodeName, - int *nodePort); -static bool IsDummyPlacement(ShardPlacement *taskPlacement); + /* * AdaptiveExecutorPreExecutorRun gets called right before postgres starts its executor @@ -830,7 +816,7 @@ AdaptiveExecutor(CitusScanState *scanState) paramListInfo); } - bool hasDependentJobs = HasDependentJobs(job); + bool hasDependentJobs = job->dependentJobList != NIL; if (hasDependentJobs) { /* jobs use intermediate results, which require a distributed transaction */ @@ -915,17 +901,6 @@ AdaptiveExecutor(CitusScanState *scanState) } -/* - * HasDependentJobs returns true if there is any dependent job - * for the mainjob(top level) job. - */ -static bool -HasDependentJobs(Job *mainJob) -{ - return list_length(mainJob->dependentJobList) > 0; -} - - /* * RunLocalExecution runs the localTaskList in the execution, fills the tuplestore * and sets the es_processed if necessary. @@ -1268,7 +1243,7 @@ DecideTransactionPropertiesForTaskList(RowModifyLevel modLevel, List *taskList, return xactProperties; } - if (DistributedExecutionRequiresRollback(taskList)) + if (TaskListRequiresRollback(taskList)) { /* transaction blocks are required if the task list needs to roll back */ xactProperties.useRemoteTransactionBlocks = TRANSACTION_BLOCKS_REQUIRED; @@ -1328,8 +1303,11 @@ StartDistributedExecution(DistributedExecution *execution) * to the first worker in a transaction block, which activates a coordinated * transaction. We need to do this before determining whether the execution * should use transaction blocks (see below). + * + * We acquire the locks for both the remote and local tasks. */ - AcquireExecutorShardLocksForExecution(execution); + AcquireExecutorShardLocksForExecution(execution->modLevel, + execution->remoteAndLocalTaskList); /* * We should not record parallel access if the target pool size is less than 2. @@ -1372,546 +1350,6 @@ DistributedExecutionModifiesDatabase(DistributedExecution *execution) } -/* - * DistributedPlanModifiesDatabase returns true if the plan modifies the data - * or the schema. - */ -bool -DistributedPlanModifiesDatabase(DistributedPlan *plan) -{ - return TaskListModifiesDatabase(plan->modLevel, plan->workerJob->taskList); -} - - -/* - * TaskListModifiesDatabase is a helper function for DistributedExecutionModifiesDatabase and - * DistributedPlanModifiesDatabase. - */ -static bool -TaskListModifiesDatabase(RowModifyLevel modLevel, List *taskList) -{ - if (modLevel > ROW_MODIFY_READONLY) - { - return true; - } - - /* - * If we cannot decide by only checking the row modify level, - * we should look closer to the tasks. - */ - if (list_length(taskList) < 1) - { - /* is this ever possible? */ - return false; - } - - Task *firstTask = (Task *) linitial(taskList); - - return !ReadOnlyTask(firstTask->taskType); -} - - -/* - * DistributedExecutionRequiresRollback returns true if the distributed - * execution should start a CoordinatedTransaction. In other words, if the - * function returns true, the execution sends BEGIN; to every connection - * involved in the distributed execution. - */ -static bool -DistributedExecutionRequiresRollback(List *taskList) -{ - int taskCount = list_length(taskList); - - if (taskCount == 0) - { - return false; - } - - Task *task = (Task *) linitial(taskList); - if (task->cannotBeExecutedInTransction) - { - /* vacuum, create index concurrently etc. */ - return false; - } - - bool selectForUpdate = task->relationRowLockList != NIL; - if (selectForUpdate) - { - /* - * Do not check SelectOpensTransactionBlock, always open transaction block - * if SELECT FOR UPDATE is executed inside a distributed transaction. - */ - return IsMultiStatementTransaction(); - } - - if (ReadOnlyTask(task->taskType)) - { - return SelectOpensTransactionBlock && - IsTransactionBlock(); - } - - if (IsMultiStatementTransaction()) - { - return true; - } - - if (list_length(taskList) > 1) - { - return true; - } - - if (list_length(task->taskPlacementList) > 1) - { - /* - * Single DML/DDL tasks with replicated tables (including - * reference and non-reference tables) should require - * BEGIN/COMMIT/ROLLBACK. - */ - return true; - } - - if (task->queryCount > 1) - { - /* - * When there are multiple sequential queries in a task - * we need to run those as a transaction. - */ - return true; - } - - return false; -} - - -/* - * TaskListRequires2PC determines whether the given task list requires 2PC. - */ -static bool -TaskListRequires2PC(List *taskList) -{ - if (taskList == NIL) - { - return false; - } - - Task *task = (Task *) linitial(taskList); - if (ReadOnlyTask(task->taskType)) - { - /* we do not trigger 2PC for ReadOnly queries */ - return false; - } - - bool singleTask = list_length(taskList) == 1; - if (singleTask && list_length(task->taskPlacementList) == 1) - { - /* we do not trigger 2PC for modifications that are: - * - single task - * - single placement - */ - return false; - } - - /* - * Otherwise, all modifications are done via 2PC. This includes: - * - Multi-shard commands irrespective of the replication factor - * - Single-shard commands that are targeting more than one replica - */ - return true; -} - - -/* - * ReadOnlyTask returns true if the input task does a read-only operation - * on the database. - */ -bool -ReadOnlyTask(TaskType taskType) -{ - switch (taskType) - { - case READ_TASK: - case MAP_OUTPUT_FETCH_TASK: - case MAP_TASK: - case MERGE_TASK: - { - return true; - } - - default: - { - return false; - } - } -} - - -/* - * TaskListCannotBeExecutedInTransaction returns true if any of the - * tasks in the input cannot be executed in a transaction. These are - * tasks like VACUUM or CREATE INDEX CONCURRENTLY etc. - */ -bool -TaskListCannotBeExecutedInTransaction(List *taskList) -{ - Task *task = NULL; - foreach_ptr(task, taskList) - { - if (task->cannotBeExecutedInTransction) - { - return true; - } - } - - return false; -} - - -/* - * SelectForUpdateOnReferenceTable returns true if the input task - * contains a FOR UPDATE clause that locks any reference tables. - */ -static bool -SelectForUpdateOnReferenceTable(List *taskList) -{ - if (list_length(taskList) != 1) - { - /* we currently do not support SELECT FOR UPDATE on multi task queries */ - return false; - } - - Task *task = (Task *) linitial(taskList); - RelationRowLock *relationRowLock = NULL; - foreach_ptr(relationRowLock, task->relationRowLockList) - { - Oid relationId = relationRowLock->relationId; - - if (IsCitusTableType(relationId, REFERENCE_TABLE)) - { - return true; - } - } - - return false; -} - - -/* - * LockPartitionsForDistributedPlan ensures commands take locks on all partitions - * of a distributed table that appears in the query. We do this primarily out of - * consistency with PostgreSQL locking. - */ -static void -LockPartitionsForDistributedPlan(DistributedPlan *distributedPlan) -{ - if (DistributedPlanModifiesDatabase(distributedPlan)) - { - Oid targetRelationId = distributedPlan->targetRelationId; - - LockPartitionsInRelationList(list_make1_oid(targetRelationId), RowExclusiveLock); - } - - /* - * Lock partitions of tables that appear in a SELECT or subquery. In the - * DML case this also includes the target relation, but since we already - * have a stronger lock this doesn't do any harm. - */ - LockPartitionsInRelationList(distributedPlan->relationIdList, AccessShareLock); -} - - -/* - * AcquireExecutorShardLocksForExecution acquires advisory lock on shard IDs - * to prevent unsafe concurrent modifications of shards. - * - * We prevent concurrent modifications of shards in two cases: - * 1. Any non-commutative writes to a replicated table - * 2. Multi-shard writes that are executed in parallel - * - * The first case ensures we do not apply updates in different orders on - * different replicas (e.g. of a reference table), which could lead the - * replicas to diverge. - * - * The second case prevents deadlocks due to out-of-order execution. - * - * There are two GUCs that can override the default behaviors. - * 'citus.all_modifications_commutative' relaxes locking - * that's done for the purpose of keeping replicas consistent. - * 'citus.enable_deadlock_prevention' relaxes locking done for - * the purpose of avoiding deadlocks between concurrent - * multi-shard commands. - * - * We do not take executor shard locks for utility commands such as - * TRUNCATE because the table locks already prevent concurrent access. - */ -static void -AcquireExecutorShardLocksForExecution(DistributedExecution *execution) -{ - RowModifyLevel modLevel = execution->modLevel; - - /* acquire the locks for both the remote and local tasks */ - List *taskList = execution->remoteAndLocalTaskList; - - if (modLevel <= ROW_MODIFY_READONLY && - !SelectForUpdateOnReferenceTable(taskList)) - { - /* - * Executor locks only apply to DML commands and SELECT FOR UPDATE queries - * touching reference tables. - */ - return; - } - - bool requiresParallelExecutionLocks = - !(list_length(taskList) == 1 || ShouldRunTasksSequentially(taskList)); - - bool modifiedTableReplicated = ModifiedTableReplicated(taskList); - if (!modifiedTableReplicated && !requiresParallelExecutionLocks) - { - /* - * When a distributed query on tables with replication - * factor == 1 and command hits only a single shard, we - * rely on Postgres to handle the serialization of the - * concurrent modifications on the workers. - * - * For reference tables, even if their placements are replicated - * ones (e.g., single node), we acquire the distributed execution - * locks to be consistent when new node(s) are added. So, they - * do not return at this point. - */ - return; - } - - /* - * We first assume that all the remaining modifications are going to - * be serialized. So, start with an ExclusiveLock and lower the lock level - * as much as possible. - */ - int lockMode = ExclusiveLock; - - /* - * In addition to honouring commutativity rules, we currently only - * allow a single multi-shard command on a shard at a time. Otherwise, - * concurrent multi-shard commands may take row-level locks on the - * shard placements in a different order and create a distributed - * deadlock. This applies even when writes are commutative and/or - * there is no replication. This can be relaxed via - * EnableDeadlockPrevention. - * - * 1. If citus.all_modifications_commutative is set to true, then all locks - * are acquired as RowExclusiveLock. - * - * 2. If citus.all_modifications_commutative is false, then only the shards - * with more than one replicas are locked with ExclusiveLock. Otherwise, the - * lock is acquired with ShareUpdateExclusiveLock. - * - * ShareUpdateExclusiveLock conflicts with itself such that only one - * multi-shard modification at a time is allowed on a shard. It also conflicts - * with ExclusiveLock, which ensures that updates/deletes/upserts are applied - * in the same order on all placements. It does not conflict with - * RowExclusiveLock, which is normally obtained by single-shard, commutative - * writes. - */ - if (!modifiedTableReplicated && requiresParallelExecutionLocks) - { - /* - * When there is no replication then we only need to prevent - * concurrent multi-shard commands on the same shards. This is - * because concurrent, parallel commands may modify the same - * set of shards, but in different orders. The order of the - * accesses might trigger distributed deadlocks that are not - * possible to happen on non-distributed systems such - * regular Postgres. - * - * As an example, assume that we have two queries: query-1 and query-2. - * Both queries access shard-1 and shard-2. If query-1 first accesses to - * shard-1 then shard-2, and query-2 accesses shard-2 then shard-1, these - * two commands might block each other in case they modify the same rows - * (e.g., cause distributed deadlocks). - * - * In either case, ShareUpdateExclusive has the desired effect, since - * it conflicts with itself and ExclusiveLock (taken by non-commutative - * writes). - * - * However, some users find this too restrictive, so we allow them to - * reduce to a RowExclusiveLock when citus.enable_deadlock_prevention - * is enabled, which lets multi-shard modifications run in parallel as - * long as they all disable the GUC. - */ - lockMode = - EnableDeadlockPrevention ? ShareUpdateExclusiveLock : RowExclusiveLock; - - if (!IsCoordinator()) - { - /* - * We also skip taking a heavy-weight lock when running a multi-shard - * commands from workers, since we currently do not prevent concurrency - * across workers anyway. - */ - lockMode = RowExclusiveLock; - } - } - else if (modifiedTableReplicated) - { - /* - * When we are executing distributed queries on replicated tables, our - * default behaviour is to prevent any concurrency. This is valid - * for when parallel execution is happening or not. - * - * The reason is that we cannot control the order of the placement accesses - * of two distributed queries to the same shards. The order of the accesses - * might cause the replicas of the same shard placements diverge. This is - * not possible to happen on non-distributed systems such regular Postgres. - * - * As an example, assume that we have two queries: query-1 and query-2. - * Both queries only access the placements of shard-1, say p-1 and p-2. - * - * And, assume that these queries are non-commutative, such as: - * query-1: UPDATE table SET b = 1 WHERE key = 1; - * query-2: UPDATE table SET b = 2 WHERE key = 1; - * - * If query-1 accesses to p-1 then p-2, and query-2 accesses - * p-2 then p-1, these two commands would leave the p-1 and p-2 - * diverged (e.g., the values for the column "b" would be different). - * - * The only exception to this rule is the single shard commutative - * modifications, such as INSERTs. In that case, we can allow - * concurrency among such backends, hence lowering the lock level - * to RowExclusiveLock. - */ - if (!requiresParallelExecutionLocks && modLevel < ROW_MODIFY_NONCOMMUTATIVE) - { - lockMode = RowExclusiveLock; - } - } - - if (AllModificationsCommutative) - { - /* - * The mapping is overridden when all_modifications_commutative is set to true. - * In that case, all modifications are treated as commutative, which can be used - * to communicate that the application is only generating commutative - * UPDATE/DELETE/UPSERT commands and exclusive locks are unnecessary. This - * is irrespective of single-shard/multi-shard or replicated tables. - */ - lockMode = RowExclusiveLock; - } - - /* now, iterate on the tasks and acquire the executor locks on the shards */ - List *anchorShardIntervalList = NIL; - List *relationRowLockList = NIL; - List *requiresConsistentSnapshotRelationShardList = NIL; - - Task *task = NULL; - foreach_ptr(task, taskList) - { - ShardInterval *anchorShardInterval = LoadShardInterval(task->anchorShardId); - anchorShardIntervalList = lappend(anchorShardIntervalList, anchorShardInterval); - - /* Acquire additional locks for SELECT .. FOR UPDATE on reference tables */ - AcquireExecutorShardLocksForRelationRowLockList(task->relationRowLockList); - - relationRowLockList = - list_concat(relationRowLockList, - task->relationRowLockList); - - /* - * If the task has a subselect, then we may need to lock the shards from which - * the query selects as well to prevent the subselects from seeing different - * results on different replicas. - */ - if (RequiresConsistentSnapshot(task)) - { - /* - * ExclusiveLock conflicts with all lock types used by modifications - * and therefore prevents other modifications from running - * concurrently. - */ - requiresConsistentSnapshotRelationShardList = - list_concat(requiresConsistentSnapshotRelationShardList, - task->relationShardList); - } - } - - /* - * Acquire the locks in a sorted way to avoid deadlocks due to lock - * ordering across concurrent sessions. - */ - anchorShardIntervalList = - SortList(anchorShardIntervalList, CompareShardIntervalsById); - - /* - * If we are dealing with a partition we are also taking locks on parent table - * to prevent deadlocks on concurrent operations on a partition and its parent. - * - * Note that this function currently does not acquire any remote locks as that - * is necessary to control the concurrency across multiple nodes for replicated - * tables. That is because Citus currently does not allow modifications to - * partitions from any node other than the coordinator. - */ - LockParentShardResourceIfPartition(anchorShardIntervalList, lockMode); - - /* Acquire distribution execution locks on the affected shards */ - SerializeNonCommutativeWrites(anchorShardIntervalList, lockMode); - - if (relationRowLockList != NIL) - { - /* Acquire additional locks for SELECT .. FOR UPDATE on reference tables */ - AcquireExecutorShardLocksForRelationRowLockList(relationRowLockList); - } - - - if (requiresConsistentSnapshotRelationShardList != NIL) - { - /* - * If the task has a subselect, then we may need to lock the shards from which - * the query selects as well to prevent the subselects from seeing different - * results on different replicas. - * - * ExclusiveLock conflicts with all lock types used by modifications - * and therefore prevents other modifications from running - * concurrently. - */ - LockRelationShardResources(requiresConsistentSnapshotRelationShardList, - ExclusiveLock); - } -} - - -/* - * ModifiedTableReplicated iterates on the task list and returns true - * if any of the tasks' anchor shard is a replicated table. We qualify - * replicated tables as any reference table or any distributed table with - * replication factor > 1. - */ -static bool -ModifiedTableReplicated(List *taskList) -{ - Task *task = NULL; - foreach_ptr(task, taskList) - { - int64 shardId = task->anchorShardId; - - if (shardId == INVALID_SHARD_ID) - { - continue; - } - - if (ReferenceTableShardId(shardId)) - { - return true; - } - - Oid relationId = RelationIdForShard(shardId); - if (!SingleReplicatedTable(relationId)) - { - return true; - } - } - - return false; -} - - /* * FinishDistributedExecution cleans up resources associated with a * distributed execution. @@ -1927,111 +1365,6 @@ FinishDistributedExecution(DistributedExecution *execution) } -/* - * CleanUpSessions does any clean-up necessary for the session used - * during the execution. We only reach the function after successfully - * completing all the tasks and we expect no tasks are still in progress. - */ -static void -CleanUpSessions(DistributedExecution *execution) -{ - List *sessionList = execution->sessionList; - - /* we get to this function only after successful executions */ - Assert(!execution->failed && execution->unfinishedTaskCount == 0); - - /* always trigger wait event set in the first round */ - WorkerSession *session = NULL; - foreach_ptr(session, sessionList) - { - MultiConnection *connection = session->connection; - - ereport(DEBUG4, (errmsg("Total number of commands sent over the session %ld: %ld " - "to node %s:%d", session->sessionId, - session->commandsSent, - connection->hostname, connection->port))); - - UnclaimConnection(connection); - - if (connection->connectionState == MULTI_CONNECTION_CONNECTING || - connection->connectionState == MULTI_CONNECTION_FAILED || - connection->connectionState == MULTI_CONNECTION_LOST || - connection->connectionState == MULTI_CONNECTION_TIMED_OUT) - { - /* - * We want the MultiConnection go away and not used in - * the subsequent executions. - * - * We cannot get MULTI_CONNECTION_LOST via the ConnectionStateMachine, - * but we might get it via the connection API and find us here before - * changing any states in the ConnectionStateMachine. - * - */ - CloseConnection(connection); - } - else if (connection->connectionState == MULTI_CONNECTION_CONNECTED) - { - RemoteTransaction *transaction = &(connection->remoteTransaction); - RemoteTransactionState transactionState = transaction->transactionState; - - if (transactionState == REMOTE_TRANS_CLEARING_RESULTS) - { - /* - * We might have established the connection, and even sent BEGIN, but not - * get to the point where we assigned a task to this specific connection - * (because other connections in the pool already finished all the tasks). - */ - Assert(session->commandsSent == 0); - - ClearResults(connection, false); - } - else if (!(transactionState == REMOTE_TRANS_NOT_STARTED || - transactionState == REMOTE_TRANS_STARTED)) - { - /* - * We don't have to handle anything else. Note that the execution - * could only finish on connectionStates of MULTI_CONNECTION_CONNECTING, - * MULTI_CONNECTION_FAILED and MULTI_CONNECTION_CONNECTED. The first two - * are already handled above. - * - * When we're on MULTI_CONNECTION_CONNECTED, TransactionStateMachine - * ensures that all the necessary commands are successfully sent over - * the connection and everything is cleared up. Otherwise, we'd have been - * on MULTI_CONNECTION_FAILED state. - */ - ereport(WARNING, (errmsg("unexpected transaction state at the end of " - "execution: %d", transactionState))); - } - - /* get ready for the next executions if we need use the same connection */ - connection->waitFlags = WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE; - } - else - { - ereport(WARNING, (errmsg("unexpected connection state at the end of " - "execution: %d", connection->connectionState))); - } - } -} - - -/* - * UnclaimAllSessionConnections unclaims all of the connections for the given - * sessionList. - */ -static void -UnclaimAllSessionConnections(List *sessionList) -{ - WorkerSession *session = NULL; - foreach_ptr(session, sessionList) - { - MultiConnection *connection = session->connection; - - UnclaimConnection(connection); - } -} - - /* * AssignTasksToConnectionsOrWorkerPool goes through the list of tasks to determine whether any * task placements need to be assigned to particular connections because of preceding @@ -2222,48 +1555,6 @@ AssignTasksToConnectionsOrWorkerPool(DistributedExecution *execution) } -/* - * LookupTaskPlacementHostAndPort sets the nodename and nodeport for the given task placement - * with a lookup. - */ -static void -LookupTaskPlacementHostAndPort(ShardPlacement *taskPlacement, char **nodeName, - int *nodePort) -{ - if (IsDummyPlacement(taskPlacement)) - { - /* - * If we create a dummy placement for the local node, it is possible - * that the entry doesn't exist in pg_dist_node, hence a lookup will fail. - * In that case we want to use the dummy placements values. - */ - *nodeName = taskPlacement->nodeName; - *nodePort = taskPlacement->nodePort; - } - else - { - /* - * We want to lookup the node information again since it is possible that - * there were changes in pg_dist_node and we will get those invalidations - * in LookupNodeForGroup. - */ - WorkerNode *workerNode = LookupNodeForGroup(taskPlacement->groupId); - *nodeName = workerNode->workerName; - *nodePort = workerNode->workerPort; - } -} - - -/* - * IsDummyPlacement returns true if the given placement is a dummy placement. - */ -static bool -IsDummyPlacement(ShardPlacement *taskPlacement) -{ - return taskPlacement->nodeId == LOCAL_NODE_ID; -} - - /* * WorkerPoolCompare is based on WorkerNodeCompare function. The function * compares two worker nodes by their host name and port number. @@ -2510,40 +1801,6 @@ RemoteSocketClosedForAnySession(DistributedExecution *execution) #endif -/* - * ShouldRunTasksSequentially returns true if each of the individual tasks - * should be executed one by one. Note that this is different than - * MultiShardConnectionType == SEQUENTIAL_CONNECTION case. In that case, - * running the tasks across the nodes in parallel is acceptable and implemented - * in that way. - * - * However, the executions that are qualified here would perform poorly if the - * tasks across the workers are executed in parallel. We currently qualify only - * one class of distributed queries here, multi-row INSERTs. If we do not enforce - * true sequential execution, concurrent multi-row upserts could easily form - * a distributed deadlock when the upserts touch the same rows. - */ -bool -ShouldRunTasksSequentially(List *taskList) -{ - if (list_length(taskList) < 2) - { - /* single task plans are already qualified as sequential by definition */ - return false; - } - - /* all the tasks are the same, so we only look one */ - Task *initialTask = (Task *) linitial(taskList); - if (initialTask->rowValuesLists != NIL) - { - /* found a multi-row INSERT */ - return true; - } - - return false; -} - - /* * SequentialRunDistributedExecution gets a distributed execution and * executes each individual task in the execution sequentially, one @@ -4777,6 +4034,7 @@ ReceiveResults(WorkerSession *session, bool storeRows) TupleDesc tupleDescriptor = tupleDest->tupleDescForQuery(tupleDest, queryIndex); if (tupleDescriptor == NULL) { + PQclear(result); continue; } @@ -4908,120 +4166,6 @@ ReceiveResults(WorkerSession *session, bool storeRows) } -/* - * TupleDescGetAttBinaryInMetadata - Build an AttInMetadata structure based on - * the supplied TupleDesc. AttInMetadata can be used in conjunction with - * fmStringInfos containing binary encoded types to produce a properly formed - * tuple. - * - * NOTE: This function is a copy of the PG function TupleDescGetAttInMetadata, - * except that it uses getTypeBinaryInputInfo instead of getTypeInputInfo. - */ -static AttInMetadata * -TupleDescGetAttBinaryInMetadata(TupleDesc tupdesc) -{ - int natts = tupdesc->natts; - int i; - Oid atttypeid; - Oid attinfuncid; - - AttInMetadata *attinmeta = (AttInMetadata *) palloc(sizeof(AttInMetadata)); - - /* "Bless" the tupledesc so that we can make rowtype datums with it */ - attinmeta->tupdesc = BlessTupleDesc(tupdesc); - - /* - * Gather info needed later to call the "in" function for each attribute - */ - FmgrInfo *attinfuncinfo = (FmgrInfo *) palloc0(natts * sizeof(FmgrInfo)); - Oid *attioparams = (Oid *) palloc0(natts * sizeof(Oid)); - int32 *atttypmods = (int32 *) palloc0(natts * sizeof(int32)); - - for (i = 0; i < natts; i++) - { - Form_pg_attribute att = TupleDescAttr(tupdesc, i); - - /* Ignore dropped attributes */ - if (!att->attisdropped) - { - atttypeid = att->atttypid; - getTypeBinaryInputInfo(atttypeid, &attinfuncid, &attioparams[i]); - fmgr_info(attinfuncid, &attinfuncinfo[i]); - atttypmods[i] = att->atttypmod; - } - } - attinmeta->attinfuncs = attinfuncinfo; - attinmeta->attioparams = attioparams; - attinmeta->atttypmods = atttypmods; - - return attinmeta; -} - - -/* - * BuildTupleFromBytes - build a HeapTuple given user data in binary form. - * values is an array of StringInfos, one for each attribute of the return - * tuple. A NULL StringInfo pointer indicates we want to create a NULL field. - * - * NOTE: This function is a copy of the PG function BuildTupleFromCStrings, - * except that it uses ReceiveFunctionCall instead of InputFunctionCall. - */ -static HeapTuple -BuildTupleFromBytes(AttInMetadata *attinmeta, fmStringInfo *values) -{ - TupleDesc tupdesc = attinmeta->tupdesc; - int natts = tupdesc->natts; - int i; - - Datum *dvalues = (Datum *) palloc(natts * sizeof(Datum)); - bool *nulls = (bool *) palloc(natts * sizeof(bool)); - - /* - * Call the "in" function for each non-dropped attribute, even for nulls, - * to support domains. - */ - for (i = 0; i < natts; i++) - { - if (!TupleDescAttr(tupdesc, i)->attisdropped) - { - /* Non-dropped attributes */ - dvalues[i] = ReceiveFunctionCall(&attinmeta->attinfuncs[i], - values[i], - attinmeta->attioparams[i], - attinmeta->atttypmods[i]); - if (values[i] != NULL) - { - nulls[i] = false; - } - else - { - nulls[i] = true; - } - } - else - { - /* Handle dropped attributes by setting to NULL */ - dvalues[i] = (Datum) 0; - nulls[i] = true; - } - } - - /* - * Form a tuple - */ - HeapTuple tuple = heap_form_tuple(tupdesc, dvalues, nulls); - - /* - * Release locally palloc'd space. XXX would probably be good to pfree - * values of pass-by-reference datums, as well. - */ - pfree(dvalues); - pfree(nulls); - - return tuple; -} - - /* * WorkerPoolFailed marks a worker pool and all the placement executions scheduled * on it as failed. @@ -5697,6 +4841,111 @@ RebuildWaitEventSetFlags(WaitEventSet *waitEventSet, List *sessionList) } +/* + * CleanUpSessions does any clean-up necessary for the session used + * during the execution. We only reach the function after successfully + * completing all the tasks and we expect no tasks are still in progress. + */ +static void +CleanUpSessions(DistributedExecution *execution) +{ + List *sessionList = execution->sessionList; + + /* we get to this function only after successful executions */ + Assert(!execution->failed && execution->unfinishedTaskCount == 0); + + /* always trigger wait event set in the first round */ + WorkerSession *session = NULL; + foreach_ptr(session, sessionList) + { + MultiConnection *connection = session->connection; + + ereport(DEBUG4, (errmsg("Total number of commands sent over the session %ld: %ld " + "to node %s:%d", session->sessionId, + session->commandsSent, + connection->hostname, connection->port))); + + UnclaimConnection(connection); + + if (connection->connectionState == MULTI_CONNECTION_CONNECTING || + connection->connectionState == MULTI_CONNECTION_FAILED || + connection->connectionState == MULTI_CONNECTION_LOST || + connection->connectionState == MULTI_CONNECTION_TIMED_OUT) + { + /* + * We want the MultiConnection go away and not used in + * the subsequent executions. + * + * We cannot get MULTI_CONNECTION_LOST via the ConnectionStateMachine, + * but we might get it via the connection API and find us here before + * changing any states in the ConnectionStateMachine. + * + */ + CloseConnection(connection); + } + else if (connection->connectionState == MULTI_CONNECTION_CONNECTED) + { + RemoteTransaction *transaction = &(connection->remoteTransaction); + RemoteTransactionState transactionState = transaction->transactionState; + + if (transactionState == REMOTE_TRANS_CLEARING_RESULTS) + { + /* + * We might have established the connection, and even sent BEGIN, but not + * get to the point where we assigned a task to this specific connection + * (because other connections in the pool already finished all the tasks). + */ + Assert(session->commandsSent == 0); + + ClearResults(connection, false); + } + else if (!(transactionState == REMOTE_TRANS_NOT_STARTED || + transactionState == REMOTE_TRANS_STARTED)) + { + /* + * We don't have to handle anything else. Note that the execution + * could only finish on connectionStates of MULTI_CONNECTION_CONNECTING, + * MULTI_CONNECTION_FAILED and MULTI_CONNECTION_CONNECTED. The first two + * are already handled above. + * + * When we're on MULTI_CONNECTION_CONNECTED, TransactionStateMachine + * ensures that all the necessary commands are successfully sent over + * the connection and everything is cleared up. Otherwise, we'd have been + * on MULTI_CONNECTION_FAILED state. + */ + ereport(WARNING, (errmsg("unexpected transaction state at the end of " + "execution: %d", transactionState))); + } + + /* get ready for the next executions if we need use the same connection */ + connection->waitFlags = WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE; + } + else + { + ereport(WARNING, (errmsg("unexpected connection state at the end of " + "execution: %d", connection->connectionState))); + } + } +} + + +/* + * UnclaimAllSessionConnections unclaims all of the connections for the given + * sessionList. + */ +static void +UnclaimAllSessionConnections(List *sessionList) +{ + WorkerSession *session = NULL; + foreach_ptr(session, sessionList) + { + MultiConnection *connection = session->connection; + + UnclaimConnection(connection); + } +} + + /* * SetLocalForceMaxQueryParallelization is simply a C interface for setting * the following: @@ -5709,89 +4958,3 @@ SetLocalForceMaxQueryParallelization(void) (superuser() ? PGC_SUSET : PGC_USERSET), PGC_S_SESSION, GUC_ACTION_LOCAL, true, 0, false); } - - -/* - * ExtractParametersForRemoteExecution extracts parameter types and values from - * the given ParamListInfo structure, and fills parameter type and value arrays. - * It changes oid of custom types to InvalidOid so that they are the same in workers - * and coordinators. - */ -static void -ExtractParametersForRemoteExecution(ParamListInfo paramListInfo, Oid **parameterTypes, - const char ***parameterValues) -{ - ExtractParametersFromParamList(paramListInfo, parameterTypes, - parameterValues, false); -} - - -/* - * ExtractParametersFromParamList extracts parameter types and values from - * the given ParamListInfo structure, and fills parameter type and value arrays. - * If useOriginalCustomTypeOids is true, it uses the original oids for custom types. - */ -void -ExtractParametersFromParamList(ParamListInfo paramListInfo, - Oid **parameterTypes, - const char ***parameterValues, bool - useOriginalCustomTypeOids) -{ - int parameterCount = paramListInfo->numParams; - - *parameterTypes = (Oid *) palloc0(parameterCount * sizeof(Oid)); - *parameterValues = (const char **) palloc0(parameterCount * sizeof(char *)); - - /* get parameter types and values */ - for (int parameterIndex = 0; parameterIndex < parameterCount; parameterIndex++) - { - ParamExternData *parameterData = ¶mListInfo->params[parameterIndex]; - Oid typeOutputFunctionId = InvalidOid; - bool variableLengthType = false; - - /* - * Use 0 for data types where the oid values can be different on - * the coordinator and worker nodes. Therefore, the worker nodes can - * infer the correct oid. - */ - if (parameterData->ptype >= FirstNormalObjectId && !useOriginalCustomTypeOids) - { - (*parameterTypes)[parameterIndex] = 0; - } - else - { - (*parameterTypes)[parameterIndex] = parameterData->ptype; - } - - /* - * If the parameter is not referenced / used (ptype == 0) and - * would otherwise have errored out inside standard_planner()), - * don't pass a value to the remote side, and pass text oid to prevent - * undetermined data type errors on workers. - */ - if (parameterData->ptype == 0) - { - (*parameterValues)[parameterIndex] = NULL; - (*parameterTypes)[parameterIndex] = TEXTOID; - - continue; - } - - /* - * If the parameter is NULL then we preserve its type, but - * don't need to evaluate its value. - */ - if (parameterData->isnull) - { - (*parameterValues)[parameterIndex] = NULL; - - continue; - } - - getTypeOutputInfo(parameterData->ptype, &typeOutputFunctionId, - &variableLengthType); - - (*parameterValues)[parameterIndex] = OidOutputFunctionCall(typeOutputFunctionId, - parameterData->value); - } -} diff --git a/src/backend/distributed/executor/distributed_execution_locks.c b/src/backend/distributed/executor/distributed_execution_locks.c index 27c6a961d..f7d2fd49d 100644 --- a/src/backend/distributed/executor/distributed_execution_locks.c +++ b/src/backend/distributed/executor/distributed_execution_locks.c @@ -9,6 +9,7 @@ *------------------------------------------------------------------------- */ #include "distributed/distributed_execution_locks.h" +#include "distributed/executor_util.h" #include "distributed/listutils.h" #include "distributed/coordinator_protocol.h" #include "distributed/metadata_cache.h" @@ -19,6 +20,259 @@ #include "distributed/transaction_management.h" +/* + * AcquireExecutorShardLocksForExecution acquires advisory lock on shard IDs + * to prevent unsafe concurrent modifications of shards. + * + * We prevent concurrent modifications of shards in two cases: + * 1. Any non-commutative writes to a replicated table + * 2. Multi-shard writes that are executed in parallel + * + * The first case ensures we do not apply updates in different orders on + * different replicas (e.g. of a reference table), which could lead the + * replicas to diverge. + * + * The second case prevents deadlocks due to out-of-order execution. + * + * There are two GUCs that can override the default behaviors. + * 'citus.all_modifications_commutative' relaxes locking + * that's done for the purpose of keeping replicas consistent. + * 'citus.enable_deadlock_prevention' relaxes locking done for + * the purpose of avoiding deadlocks between concurrent + * multi-shard commands. + * + * We do not take executor shard locks for utility commands such as + * TRUNCATE because the table locks already prevent concurrent access. + */ +void +AcquireExecutorShardLocksForExecution(RowModifyLevel modLevel, List *taskList) +{ + if (modLevel <= ROW_MODIFY_READONLY && + !SelectForUpdateOnReferenceTable(taskList)) + { + /* + * Executor locks only apply to DML commands and SELECT FOR UPDATE queries + * touching reference tables. + */ + return; + } + + bool requiresParallelExecutionLocks = + !(list_length(taskList) == 1 || ShouldRunTasksSequentially(taskList)); + + bool modifiedTableReplicated = ModifiedTableReplicated(taskList); + if (!modifiedTableReplicated && !requiresParallelExecutionLocks) + { + /* + * When a distributed query on tables with replication + * factor == 1 and command hits only a single shard, we + * rely on Postgres to handle the serialization of the + * concurrent modifications on the workers. + * + * For reference tables, even if their placements are replicated + * ones (e.g., single node), we acquire the distributed execution + * locks to be consistent when new node(s) are added. So, they + * do not return at this point. + */ + return; + } + + /* + * We first assume that all the remaining modifications are going to + * be serialized. So, start with an ExclusiveLock and lower the lock level + * as much as possible. + */ + int lockMode = ExclusiveLock; + + /* + * In addition to honouring commutativity rules, we currently only + * allow a single multi-shard command on a shard at a time. Otherwise, + * concurrent multi-shard commands may take row-level locks on the + * shard placements in a different order and create a distributed + * deadlock. This applies even when writes are commutative and/or + * there is no replication. This can be relaxed via + * EnableDeadlockPrevention. + * + * 1. If citus.all_modifications_commutative is set to true, then all locks + * are acquired as RowExclusiveLock. + * + * 2. If citus.all_modifications_commutative is false, then only the shards + * with more than one replicas are locked with ExclusiveLock. Otherwise, the + * lock is acquired with ShareUpdateExclusiveLock. + * + * ShareUpdateExclusiveLock conflicts with itself such that only one + * multi-shard modification at a time is allowed on a shard. It also conflicts + * with ExclusiveLock, which ensures that updates/deletes/upserts are applied + * in the same order on all placements. It does not conflict with + * RowExclusiveLock, which is normally obtained by single-shard, commutative + * writes. + */ + if (!modifiedTableReplicated && requiresParallelExecutionLocks) + { + /* + * When there is no replication then we only need to prevent + * concurrent multi-shard commands on the same shards. This is + * because concurrent, parallel commands may modify the same + * set of shards, but in different orders. The order of the + * accesses might trigger distributed deadlocks that are not + * possible to happen on non-distributed systems such + * regular Postgres. + * + * As an example, assume that we have two queries: query-1 and query-2. + * Both queries access shard-1 and shard-2. If query-1 first accesses to + * shard-1 then shard-2, and query-2 accesses shard-2 then shard-1, these + * two commands might block each other in case they modify the same rows + * (e.g., cause distributed deadlocks). + * + * In either case, ShareUpdateExclusive has the desired effect, since + * it conflicts with itself and ExclusiveLock (taken by non-commutative + * writes). + * + * However, some users find this too restrictive, so we allow them to + * reduce to a RowExclusiveLock when citus.enable_deadlock_prevention + * is enabled, which lets multi-shard modifications run in parallel as + * long as they all disable the GUC. + */ + lockMode = + EnableDeadlockPrevention ? ShareUpdateExclusiveLock : RowExclusiveLock; + + if (!IsCoordinator()) + { + /* + * We also skip taking a heavy-weight lock when running a multi-shard + * commands from workers, since we currently do not prevent concurrency + * across workers anyway. + */ + lockMode = RowExclusiveLock; + } + } + else if (modifiedTableReplicated) + { + /* + * When we are executing distributed queries on replicated tables, our + * default behaviour is to prevent any concurrency. This is valid + * for when parallel execution is happening or not. + * + * The reason is that we cannot control the order of the placement accesses + * of two distributed queries to the same shards. The order of the accesses + * might cause the replicas of the same shard placements diverge. This is + * not possible to happen on non-distributed systems such regular Postgres. + * + * As an example, assume that we have two queries: query-1 and query-2. + * Both queries only access the placements of shard-1, say p-1 and p-2. + * + * And, assume that these queries are non-commutative, such as: + * query-1: UPDATE table SET b = 1 WHERE key = 1; + * query-2: UPDATE table SET b = 2 WHERE key = 1; + * + * If query-1 accesses to p-1 then p-2, and query-2 accesses + * p-2 then p-1, these two commands would leave the p-1 and p-2 + * diverged (e.g., the values for the column "b" would be different). + * + * The only exception to this rule is the single shard commutative + * modifications, such as INSERTs. In that case, we can allow + * concurrency among such backends, hence lowering the lock level + * to RowExclusiveLock. + */ + if (!requiresParallelExecutionLocks && modLevel < ROW_MODIFY_NONCOMMUTATIVE) + { + lockMode = RowExclusiveLock; + } + } + + if (AllModificationsCommutative) + { + /* + * The mapping is overridden when all_modifications_commutative is set to true. + * In that case, all modifications are treated as commutative, which can be used + * to communicate that the application is only generating commutative + * UPDATE/DELETE/UPSERT commands and exclusive locks are unnecessary. This + * is irrespective of single-shard/multi-shard or replicated tables. + */ + lockMode = RowExclusiveLock; + } + + /* now, iterate on the tasks and acquire the executor locks on the shards */ + List *anchorShardIntervalList = NIL; + List *relationRowLockList = NIL; + List *requiresConsistentSnapshotRelationShardList = NIL; + + Task *task = NULL; + foreach_ptr(task, taskList) + { + ShardInterval *anchorShardInterval = LoadShardInterval(task->anchorShardId); + anchorShardIntervalList = lappend(anchorShardIntervalList, anchorShardInterval); + + /* Acquire additional locks for SELECT .. FOR UPDATE on reference tables */ + AcquireExecutorShardLocksForRelationRowLockList(task->relationRowLockList); + + relationRowLockList = + list_concat(relationRowLockList, + task->relationRowLockList); + + /* + * If the task has a subselect, then we may need to lock the shards from which + * the query selects as well to prevent the subselects from seeing different + * results on different replicas. + */ + if (RequiresConsistentSnapshot(task)) + { + /* + * ExclusiveLock conflicts with all lock types used by modifications + * and therefore prevents other modifications from running + * concurrently. + */ + requiresConsistentSnapshotRelationShardList = + list_concat(requiresConsistentSnapshotRelationShardList, + task->relationShardList); + } + } + + /* + * Acquire the locks in a sorted way to avoid deadlocks due to lock + * ordering across concurrent sessions. + */ + anchorShardIntervalList = + SortList(anchorShardIntervalList, CompareShardIntervalsById); + + /* + * If we are dealing with a partition we are also taking locks on parent table + * to prevent deadlocks on concurrent operations on a partition and its parent. + * + * Note that this function currently does not acquire any remote locks as that + * is necessary to control the concurrency across multiple nodes for replicated + * tables. That is because Citus currently does not allow modifications to + * partitions from any node other than the coordinator. + */ + LockParentShardResourceIfPartition(anchorShardIntervalList, lockMode); + + /* Acquire distribution execution locks on the affected shards */ + SerializeNonCommutativeWrites(anchorShardIntervalList, lockMode); + + if (relationRowLockList != NIL) + { + /* Acquire additional locks for SELECT .. FOR UPDATE on reference tables */ + AcquireExecutorShardLocksForRelationRowLockList(relationRowLockList); + } + + + if (requiresConsistentSnapshotRelationShardList != NIL) + { + /* + * If the task has a subselect, then we may need to lock the shards from which + * the query selects as well to prevent the subselects from seeing different + * results on different replicas. + * + * ExclusiveLock conflicts with all lock types used by modifications + * and therefore prevents other modifications from running + * concurrently. + */ + LockRelationShardResources(requiresConsistentSnapshotRelationShardList, + ExclusiveLock); + } +} + + /* * RequiresConsistentSnapshot returns true if the given task need to take * the necessary locks to ensure that a subquery in the modify query @@ -188,3 +442,27 @@ LockPartitionRelations(Oid relationId, LOCKMODE lockMode) LockRelationOid(partitionRelationId, lockMode); } } + + +/* + * LockPartitionsForDistributedPlan ensures commands take locks on all partitions + * of a distributed table that appears in the query. We do this primarily out of + * consistency with PostgreSQL locking. + */ +void +LockPartitionsForDistributedPlan(DistributedPlan *plan) +{ + if (TaskListModifiesDatabase(plan->modLevel, plan->workerJob->taskList)) + { + Oid targetRelationId = plan->targetRelationId; + + LockPartitionsInRelationList(list_make1_oid(targetRelationId), RowExclusiveLock); + } + + /* + * Lock partitions of tables that appear in a SELECT or subquery. In the + * DML case this also includes the target relation, but since we already + * have a stronger lock this doesn't do any harm. + */ + LockPartitionsInRelationList(plan->relationIdList, AccessShareLock); +} diff --git a/src/backend/distributed/executor/executor_util_params.c b/src/backend/distributed/executor/executor_util_params.c new file mode 100644 index 000000000..6b5139bff --- /dev/null +++ b/src/backend/distributed/executor/executor_util_params.c @@ -0,0 +1,101 @@ +/*------------------------------------------------------------------------- + * + * executor_util_tasks.c + * + * Utility functions for dealing with task lists in the executor. + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "funcapi.h" +#include "miscadmin.h" + +#include "distributed/executor_util.h" +#include "utils/lsyscache.h" + + +/* + * ExtractParametersForRemoteExecution extracts parameter types and values from + * the given ParamListInfo structure, and fills parameter type and value arrays. + * It changes oid of custom types to InvalidOid so that they are the same in workers + * and coordinators. + */ +void +ExtractParametersForRemoteExecution(ParamListInfo paramListInfo, Oid **parameterTypes, + const char ***parameterValues) +{ + ExtractParametersFromParamList(paramListInfo, parameterTypes, + parameterValues, false); +} + + +/* + * ExtractParametersFromParamList extracts parameter types and values from + * the given ParamListInfo structure, and fills parameter type and value arrays. + * If useOriginalCustomTypeOids is true, it uses the original oids for custom types. + */ +void +ExtractParametersFromParamList(ParamListInfo paramListInfo, + Oid **parameterTypes, + const char ***parameterValues, bool + useOriginalCustomTypeOids) +{ + int parameterCount = paramListInfo->numParams; + + *parameterTypes = (Oid *) palloc0(parameterCount * sizeof(Oid)); + *parameterValues = (const char **) palloc0(parameterCount * sizeof(char *)); + + /* get parameter types and values */ + for (int parameterIndex = 0; parameterIndex < parameterCount; parameterIndex++) + { + ParamExternData *parameterData = ¶mListInfo->params[parameterIndex]; + Oid typeOutputFunctionId = InvalidOid; + bool variableLengthType = false; + + /* + * Use 0 for data types where the oid values can be different on + * the coordinator and worker nodes. Therefore, the worker nodes can + * infer the correct oid. + */ + if (parameterData->ptype >= FirstNormalObjectId && !useOriginalCustomTypeOids) + { + (*parameterTypes)[parameterIndex] = 0; + } + else + { + (*parameterTypes)[parameterIndex] = parameterData->ptype; + } + + /* + * If the parameter is not referenced / used (ptype == 0) and + * would otherwise have errored out inside standard_planner()), + * don't pass a value to the remote side, and pass text oid to prevent + * undetermined data type errors on workers. + */ + if (parameterData->ptype == 0) + { + (*parameterValues)[parameterIndex] = NULL; + (*parameterTypes)[parameterIndex] = TEXTOID; + + continue; + } + + /* + * If the parameter is NULL then we preserve its type, but + * don't need to evaluate its value. + */ + if (parameterData->isnull) + { + (*parameterValues)[parameterIndex] = NULL; + + continue; + } + + getTypeOutputInfo(parameterData->ptype, &typeOutputFunctionId, + &variableLengthType); + + (*parameterValues)[parameterIndex] = OidOutputFunctionCall(typeOutputFunctionId, + parameterData->value); + } +} diff --git a/src/backend/distributed/executor/executor_util_tasks.c b/src/backend/distributed/executor/executor_util_tasks.c new file mode 100644 index 000000000..abf721196 --- /dev/null +++ b/src/backend/distributed/executor/executor_util_tasks.c @@ -0,0 +1,297 @@ +/*------------------------------------------------------------------------- + * + * executor_util_tasks.c + * + * Utility functions for dealing with task lists in the executor. + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "funcapi.h" +#include "miscadmin.h" + +#include "distributed/executor_util.h" +#include "distributed/listutils.h" +#include "distributed/shardinterval_utils.h" + + +/* + * TaskListModifiesDatabase is a helper function for DistributedExecutionModifiesDatabase and + * DistributedPlanModifiesDatabase. + */ +bool +TaskListModifiesDatabase(RowModifyLevel modLevel, List *taskList) +{ + if (modLevel > ROW_MODIFY_READONLY) + { + return true; + } + + /* + * If we cannot decide by only checking the row modify level, + * we should look closer to the tasks. + */ + if (list_length(taskList) < 1) + { + /* is this ever possible? */ + return false; + } + + Task *firstTask = (Task *) linitial(taskList); + + return !ReadOnlyTask(firstTask->taskType); +} + + +/* + * TaskListRequiresRollback returns true if the distributed + * execution should start a CoordinatedTransaction. In other words, if the + * function returns true, the execution sends BEGIN; to every connection + * involved in the distributed execution. + */ +bool +TaskListRequiresRollback(List *taskList) +{ + int taskCount = list_length(taskList); + + if (taskCount == 0) + { + return false; + } + + Task *task = (Task *) linitial(taskList); + if (task->cannotBeExecutedInTransction) + { + /* vacuum, create index concurrently etc. */ + return false; + } + + bool selectForUpdate = task->relationRowLockList != NIL; + if (selectForUpdate) + { + /* + * Do not check SelectOpensTransactionBlock, always open transaction block + * if SELECT FOR UPDATE is executed inside a distributed transaction. + */ + return IsMultiStatementTransaction(); + } + + if (ReadOnlyTask(task->taskType)) + { + return SelectOpensTransactionBlock && + IsTransactionBlock(); + } + + if (IsMultiStatementTransaction()) + { + return true; + } + + if (list_length(taskList) > 1) + { + return true; + } + + if (list_length(task->taskPlacementList) > 1) + { + /* + * Single DML/DDL tasks with replicated tables (including + * reference and non-reference tables) should require + * BEGIN/COMMIT/ROLLBACK. + */ + return true; + } + + if (task->queryCount > 1) + { + /* + * When there are multiple sequential queries in a task + * we need to run those as a transaction. + */ + return true; + } + + return false; +} + + +/* + * TaskListRequires2PC determines whether the given task list requires 2PC. + */ +bool +TaskListRequires2PC(List *taskList) +{ + if (taskList == NIL) + { + return false; + } + + Task *task = (Task *) linitial(taskList); + if (ReadOnlyTask(task->taskType)) + { + /* we do not trigger 2PC for ReadOnly queries */ + return false; + } + + bool singleTask = list_length(taskList) == 1; + if (singleTask && list_length(task->taskPlacementList) == 1) + { + /* we do not trigger 2PC for modifications that are: + * - single task + * - single placement + */ + return false; + } + + /* + * Otherwise, all modifications are done via 2PC. This includes: + * - Multi-shard commands irrespective of the replication factor + * - Single-shard commands that are targeting more than one replica + */ + return true; +} + + +/* + * TaskListCannotBeExecutedInTransaction returns true if any of the + * tasks in the input cannot be executed in a transaction. These are + * tasks like VACUUM or CREATE INDEX CONCURRENTLY etc. + */ +bool +TaskListCannotBeExecutedInTransaction(List *taskList) +{ + Task *task = NULL; + foreach_ptr(task, taskList) + { + if (task->cannotBeExecutedInTransction) + { + return true; + } + } + + return false; +} + + +/* + * SelectForUpdateOnReferenceTable returns true if the input task + * contains a FOR UPDATE clause that locks any reference tables. + */ +bool +SelectForUpdateOnReferenceTable(List *taskList) +{ + if (list_length(taskList) != 1) + { + /* we currently do not support SELECT FOR UPDATE on multi task queries */ + return false; + } + + Task *task = (Task *) linitial(taskList); + RelationRowLock *relationRowLock = NULL; + foreach_ptr(relationRowLock, task->relationRowLockList) + { + Oid relationId = relationRowLock->relationId; + + if (IsCitusTableType(relationId, REFERENCE_TABLE)) + { + return true; + } + } + + return false; +} + + +/* + * ReadOnlyTask returns true if the input task does a read-only operation + * on the database. + */ +bool +ReadOnlyTask(TaskType taskType) +{ + switch (taskType) + { + case READ_TASK: + case MAP_OUTPUT_FETCH_TASK: + case MAP_TASK: + case MERGE_TASK: + { + return true; + } + + default: + { + return false; + } + } +} + + +/* + * ModifiedTableReplicated iterates on the task list and returns true + * if any of the tasks' anchor shard is a replicated table. We qualify + * replicated tables as any reference table or any distributed table with + * replication factor > 1. + */ +bool +ModifiedTableReplicated(List *taskList) +{ + Task *task = NULL; + foreach_ptr(task, taskList) + { + int64 shardId = task->anchorShardId; + + if (shardId == INVALID_SHARD_ID) + { + continue; + } + + if (ReferenceTableShardId(shardId)) + { + return true; + } + + Oid relationId = RelationIdForShard(shardId); + if (!SingleReplicatedTable(relationId)) + { + return true; + } + } + + return false; +} + + +/* + * ShouldRunTasksSequentially returns true if each of the individual tasks + * should be executed one by one. Note that this is different than + * MultiShardConnectionType == SEQUENTIAL_CONNECTION case. In that case, + * running the tasks across the nodes in parallel is acceptable and implemented + * in that way. + * + * However, the executions that are qualified here would perform poorly if the + * tasks across the workers are executed in parallel. We currently qualify only + * one class of distributed queries here, multi-row INSERTs. If we do not enforce + * true sequential execution, concurrent multi-row upserts could easily form + * a distributed deadlock when the upserts touch the same rows. + */ +bool +ShouldRunTasksSequentially(List *taskList) +{ + if (list_length(taskList) < 2) + { + /* single task plans are already qualified as sequential by definition */ + return false; + } + + /* all the tasks are the same, so we only look one */ + Task *initialTask = (Task *) linitial(taskList); + if (initialTask->rowValuesLists != NIL) + { + /* found a multi-row INSERT */ + return true; + } + + return false; +} diff --git a/src/backend/distributed/executor/executor_util_tuples.c b/src/backend/distributed/executor/executor_util_tuples.c new file mode 100644 index 000000000..c5fde9f90 --- /dev/null +++ b/src/backend/distributed/executor/executor_util_tuples.c @@ -0,0 +1,129 @@ +/*------------------------------------------------------------------------- + * + * executor_util_tuples.c + * + * Utility functions for handling tuples during remote execution. + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "funcapi.h" +#include "miscadmin.h" + +#include "distributed/executor_util.h" +#include "utils/lsyscache.h" + + +/* + * TupleDescGetAttBinaryInMetadata - Build an AttInMetadata structure based on + * the supplied TupleDesc. AttInMetadata can be used in conjunction with + * fmStringInfos containing binary encoded types to produce a properly formed + * tuple. + * + * NOTE: This function is a copy of the PG function TupleDescGetAttInMetadata, + * except that it uses getTypeBinaryInputInfo instead of getTypeInputInfo. + */ +AttInMetadata * +TupleDescGetAttBinaryInMetadata(TupleDesc tupdesc) +{ + int natts = tupdesc->natts; + int i; + Oid atttypeid; + Oid attinfuncid; + + AttInMetadata *attinmeta = (AttInMetadata *) palloc(sizeof(AttInMetadata)); + + /* "Bless" the tupledesc so that we can make rowtype datums with it */ + attinmeta->tupdesc = BlessTupleDesc(tupdesc); + + /* + * Gather info needed later to call the "in" function for each attribute + */ + FmgrInfo *attinfuncinfo = (FmgrInfo *) palloc0(natts * sizeof(FmgrInfo)); + Oid *attioparams = (Oid *) palloc0(natts * sizeof(Oid)); + int32 *atttypmods = (int32 *) palloc0(natts * sizeof(int32)); + + for (i = 0; i < natts; i++) + { + Form_pg_attribute att = TupleDescAttr(tupdesc, i); + + /* Ignore dropped attributes */ + if (!att->attisdropped) + { + atttypeid = att->atttypid; + getTypeBinaryInputInfo(atttypeid, &attinfuncid, &attioparams[i]); + fmgr_info(attinfuncid, &attinfuncinfo[i]); + atttypmods[i] = att->atttypmod; + } + } + attinmeta->attinfuncs = attinfuncinfo; + attinmeta->attioparams = attioparams; + attinmeta->atttypmods = atttypmods; + + return attinmeta; +} + + +/* + * BuildTupleFromBytes - build a HeapTuple given user data in binary form. + * values is an array of StringInfos, one for each attribute of the return + * tuple. A NULL StringInfo pointer indicates we want to create a NULL field. + * + * NOTE: This function is a copy of the PG function BuildTupleFromCStrings, + * except that it uses ReceiveFunctionCall instead of InputFunctionCall. + */ +HeapTuple +BuildTupleFromBytes(AttInMetadata *attinmeta, fmStringInfo *values) +{ + TupleDesc tupdesc = attinmeta->tupdesc; + int natts = tupdesc->natts; + int i; + + Datum *dvalues = (Datum *) palloc(natts * sizeof(Datum)); + bool *nulls = (bool *) palloc(natts * sizeof(bool)); + + /* + * Call the "in" function for each non-dropped attribute, even for nulls, + * to support domains. + */ + for (i = 0; i < natts; i++) + { + if (!TupleDescAttr(tupdesc, i)->attisdropped) + { + /* Non-dropped attributes */ + dvalues[i] = ReceiveFunctionCall(&attinmeta->attinfuncs[i], + values[i], + attinmeta->attioparams[i], + attinmeta->atttypmods[i]); + if (values[i] != NULL) + { + nulls[i] = false; + } + else + { + nulls[i] = true; + } + } + else + { + /* Handle dropped attributes by setting to NULL */ + dvalues[i] = (Datum) 0; + nulls[i] = true; + } + } + + /* + * Form a tuple + */ + HeapTuple tuple = heap_form_tuple(tupdesc, dvalues, nulls); + + /* + * Release locally palloc'd space. XXX would probably be good to pfree + * values of pass-by-reference datums, as well. + */ + pfree(dvalues); + pfree(nulls); + + return tuple; +} diff --git a/src/backend/distributed/executor/insert_select_executor.c b/src/backend/distributed/executor/insert_select_executor.c index 9549846d5..a69ae0f22 100644 --- a/src/backend/distributed/executor/insert_select_executor.c +++ b/src/backend/distributed/executor/insert_select_executor.c @@ -409,11 +409,13 @@ ExecutePlanIntoColocatedIntermediateResults(Oid targetRelationId, columnNameList); /* set up a DestReceiver that copies into the intermediate table */ + const bool publishableData = true; CitusCopyDestReceiver *copyDest = CreateCitusCopyDestReceiver(targetRelationId, columnNameList, partitionColumnIndex, executorState, - intermediateResultIdPrefix); + intermediateResultIdPrefix, + publishableData); ExecutePlanIntoDestReceiver(selectPlan, paramListInfo, (DestReceiver *) copyDest); @@ -443,10 +445,12 @@ ExecutePlanIntoRelation(Oid targetRelationId, List *insertTargetList, columnNameList); /* set up a DestReceiver that copies into the distributed table */ + const bool publishableData = true; CitusCopyDestReceiver *copyDest = CreateCitusCopyDestReceiver(targetRelationId, columnNameList, partitionColumnIndex, - executorState, NULL); + executorState, NULL, + publishableData); ExecutePlanIntoDestReceiver(selectPlan, paramListInfo, (DestReceiver *) copyDest); diff --git a/src/backend/distributed/executor/local_executor.c b/src/backend/distributed/executor/local_executor.c index ffd063ca0..d946e15c8 100644 --- a/src/backend/distributed/executor/local_executor.c +++ b/src/backend/distributed/executor/local_executor.c @@ -90,6 +90,7 @@ #include "distributed/local_executor.h" #include "distributed/local_plan_cache.h" #include "distributed/coordinator_protocol.h" +#include "distributed/executor_util.h" #include "distributed/metadata_cache.h" #include "distributed/multi_executor.h" #include "distributed/multi_server_executor.h" diff --git a/src/backend/distributed/executor/multi_executor.c b/src/backend/distributed/executor/multi_executor.c index a0063adc8..04cb39a58 100644 --- a/src/backend/distributed/executor/multi_executor.c +++ b/src/backend/distributed/executor/multi_executor.c @@ -802,6 +802,11 @@ GetObjectTypeString(ObjectType objType) return "function"; } + case OBJECT_PUBLICATION: + { + return "publication"; + } + case OBJECT_SCHEMA: { return "schema"; diff --git a/src/backend/distributed/metadata/dependency.c b/src/backend/distributed/metadata/dependency.c index a67c8fed0..a58e57be7 100644 --- a/src/backend/distributed/metadata/dependency.c +++ b/src/backend/distributed/metadata/dependency.c @@ -132,6 +132,7 @@ typedef struct ViewDependencyNode static List * GetRelationSequenceDependencyList(Oid relationId); static List * GetRelationFunctionDependencyList(Oid relationId); static List * GetRelationTriggerFunctionDependencyList(Oid relationId); +static List * GetPublicationRelationsDependencyList(Oid relationId); static List * GetRelationStatsSchemaDependencyList(Oid relationId); static List * GetRelationIndicesDependencyList(Oid relationId); static DependencyDefinition * CreateObjectAddressDependencyDef(Oid classId, Oid objectId); @@ -722,6 +723,11 @@ SupportedDependencyByCitus(const ObjectAddress *address) return true; } + case OCLASS_PUBLICATION: + { + return true; + } + case OCLASS_TSCONFIG: { return true; @@ -1656,6 +1662,36 @@ ExpandCitusSupportedTypes(ObjectAddressCollector *collector, ObjectAddress targe List *ruleRefDepList = GetViewRuleReferenceDependencyList(relationId); result = list_concat(result, ruleRefDepList); } + + break; + } + + case PublicationRelationId: + { + Oid publicationId = target.objectId; + + /* + * Publications do not depend directly on relations, because dropping + * the relation will only remove it from the publications. However, + * we add a dependency to ensure the relation is created first when + * adding a node. + */ + List *relationDependencyList = + GetPublicationRelationsDependencyList(publicationId); + result = list_concat(result, relationDependencyList); + + /* + * As of PostgreSQL 15, the same applies to schemas. + */ +#if PG_VERSION_NUM >= PG_VERSION_15 + List *schemaIdList = + GetPublicationSchemas(publicationId); + List *schemaDependencyList = + CreateObjectAddressDependencyDefList(NamespaceRelationId, schemaIdList); + result = list_concat(result, schemaDependencyList); +#endif + + break; } default: @@ -1834,7 +1870,7 @@ static List * GetRelationSequenceDependencyList(Oid relationId) { List *seqInfoList = NIL; - GetDependentSequencesWithRelation(relationId, &seqInfoList, 0); + GetDependentSequencesWithRelation(relationId, &seqInfoList, 0, DEPENDENCY_AUTO); List *seqIdList = NIL; SequenceInfo *seqInfo = NULL; @@ -1923,6 +1959,33 @@ GetRelationTriggerFunctionDependencyList(Oid relationId) } +/* + * GetPublicationRelationsDependencyList creates a list of ObjectAddressDependencies for + * a publication on the Citus relations it contains. This helps make sure we distribute + * Citus tables before local tables. + */ +static List * +GetPublicationRelationsDependencyList(Oid publicationId) +{ + List *allRelationIds = GetPublicationRelations(publicationId, PUBLICATION_PART_ROOT); + List *citusRelationIds = NIL; + + Oid relationId = InvalidOid; + + foreach_oid(relationId, allRelationIds) + { + if (!IsCitusTable(relationId)) + { + continue; + } + + citusRelationIds = lappend_oid(citusRelationIds, relationId); + } + + return CreateObjectAddressDependencyDefList(RelationRelationId, citusRelationIds); +} + + /* * GetTypeConstraintDependencyDefinition creates a list of constraint dependency * definitions for a given type diff --git a/src/backend/distributed/metadata/metadata_cache.c b/src/backend/distributed/metadata/metadata_cache.c index 8fd4c5de6..1e73eef6b 100644 --- a/src/backend/distributed/metadata/metadata_cache.c +++ b/src/backend/distributed/metadata/metadata_cache.c @@ -311,7 +311,7 @@ static void InvalidateDistTableCache(void); static void InvalidateDistObjectCache(void); static bool InitializeTableCacheEntry(int64 shardId, bool missingOk); static bool IsCitusTableTypeInternal(char partitionMethod, char replicationModel, - CitusTableType tableType); + uint32 colocationId, CitusTableType tableType); static bool RefreshTableCacheEntryIfInvalid(ShardIdCacheEntry *shardEntry, bool missingOk); @@ -450,7 +450,36 @@ bool IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEntry, CitusTableType tableType) { return IsCitusTableTypeInternal(tableEntry->partitionMethod, - tableEntry->replicationModel, tableType); + tableEntry->replicationModel, + tableEntry->colocationId, tableType); +} + + +/* + * HasDistributionKey returs true if given Citus table doesn't have a + * distribution key. + */ +bool +HasDistributionKey(Oid relationId) +{ + CitusTableCacheEntry *tableEntry = LookupCitusTableCacheEntry(relationId); + if (tableEntry == NULL) + { + ereport(ERROR, (errmsg("relation with oid %u is not a Citus table", relationId))); + } + + return HasDistributionKeyCacheEntry(tableEntry); +} + + +/* + * HasDistributionKey returs true if given cache entry identifies a Citus + * table that doesn't have a distribution key. + */ +bool +HasDistributionKeyCacheEntry(CitusTableCacheEntry *tableEntry) +{ + return tableEntry->partitionMethod != DISTRIBUTE_BY_NONE; } @@ -460,7 +489,7 @@ IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEntry, CitusTableType tabl */ static bool IsCitusTableTypeInternal(char partitionMethod, char replicationModel, - CitusTableType tableType) + uint32 colocationId, CitusTableType tableType) { switch (tableType) { @@ -501,12 +530,8 @@ IsCitusTableTypeInternal(char partitionMethod, char replicationModel, case CITUS_LOCAL_TABLE: { return partitionMethod == DISTRIBUTE_BY_NONE && - replicationModel != REPLICATION_MODEL_2PC; - } - - case CITUS_TABLE_WITH_NO_DIST_KEY: - { - return partitionMethod == DISTRIBUTE_BY_NONE; + replicationModel != REPLICATION_MODEL_2PC && + colocationId == INVALID_COLOCATION_ID; } case ANY_CITUS_TABLE_TYPE: @@ -529,33 +554,21 @@ IsCitusTableTypeInternal(char partitionMethod, char replicationModel, char * GetTableTypeName(Oid tableId) { - bool regularTable = false; - char partitionMethod = ' '; - char replicationModel = ' '; - if (IsCitusTable(tableId)) - { - CitusTableCacheEntry *referencingCacheEntry = GetCitusTableCacheEntry(tableId); - partitionMethod = referencingCacheEntry->partitionMethod; - replicationModel = referencingCacheEntry->replicationModel; - } - else - { - regularTable = true; - } - - if (regularTable) + if (!IsCitusTable(tableId)) { return "regular table"; } - else if (partitionMethod == 'h') + + CitusTableCacheEntry *tableCacheEntry = GetCitusTableCacheEntry(tableId); + if (IsCitusTableTypeCacheEntry(tableCacheEntry, HASH_DISTRIBUTED)) { return "distributed table"; } - else if (partitionMethod == 'n' && replicationModel == 't') + else if (IsCitusTableTypeCacheEntry(tableCacheEntry, REFERENCE_TABLE)) { return "reference table"; } - else if (partitionMethod == 'n' && replicationModel != 't') + else if (IsCitusTableTypeCacheEntry(tableCacheEntry, CITUS_LOCAL_TABLE)) { return "citus local table"; } @@ -577,6 +590,18 @@ IsCitusTable(Oid relationId) } +/* + * IsCitusTableRangeVar returns whether the table named in the given + * rangeVar is a Citus table. + */ +bool +IsCitusTableRangeVar(RangeVar *rangeVar, LOCKMODE lockMode, bool missingOK) +{ + Oid relationId = RangeVarGetRelid(rangeVar, lockMode, missingOK); + return IsCitusTable(relationId); +} + + /* * IsCitusTableViaCatalog returns whether the given relation is a * distributed table or not. @@ -765,14 +790,28 @@ PgDistPartitionTupleViaCatalog(Oid relationId) /* - * IsCitusLocalTableByDistParams returns true if given partitionMethod and - * replicationModel would identify a citus local table. + * IsReferenceTableByDistParams returns true if given partitionMethod and + * replicationModel would identify a reference table. */ bool -IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel) +IsReferenceTableByDistParams(char partitionMethod, char replicationModel) { return partitionMethod == DISTRIBUTE_BY_NONE && - replicationModel != REPLICATION_MODEL_2PC; + replicationModel == REPLICATION_MODEL_2PC; +} + + +/* + * IsCitusLocalTableByDistParams returns true if given partitionMethod, + * replicationModel and colocationId would identify a citus local table. + */ +bool +IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel, + uint32 colocationId) +{ + return partitionMethod == DISTRIBUTE_BY_NONE && + replicationModel != REPLICATION_MODEL_2PC && + colocationId == INVALID_COLOCATION_ID; } @@ -4837,11 +4876,14 @@ CitusTableTypeIdList(CitusTableType citusTableType) Datum partMethodDatum = datumArray[Anum_pg_dist_partition_partmethod - 1]; Datum replicationModelDatum = datumArray[Anum_pg_dist_partition_repmodel - 1]; + Datum colocationIdDatum = datumArray[Anum_pg_dist_partition_colocationid - 1]; Oid partitionMethod = DatumGetChar(partMethodDatum); Oid replicationModel = DatumGetChar(replicationModelDatum); + uint32 colocationId = DatumGetUInt32(colocationIdDatum); - if (IsCitusTableTypeInternal(partitionMethod, replicationModel, citusTableType)) + if (IsCitusTableTypeInternal(partitionMethod, replicationModel, colocationId, + citusTableType)) { Datum relationIdDatum = datumArray[Anum_pg_dist_partition_logicalrelid - 1]; diff --git a/src/backend/distributed/metadata/metadata_sync.c b/src/backend/distributed/metadata/metadata_sync.c index 6a5840f78..e3310c5c8 100644 --- a/src/backend/distributed/metadata/metadata_sync.c +++ b/src/backend/distributed/metadata/metadata_sync.c @@ -90,6 +90,7 @@ /* managed via a GUC */ char *EnableManualMetadataChangesForUser = ""; +int MetadataSyncTransMode = METADATA_SYNC_TRANSACTIONAL; static void EnsureObjectMetadataIsSane(int distributionArgumentIndex, @@ -100,6 +101,7 @@ static bool HasMetadataWorkers(void); static void CreateShellTableOnWorkers(Oid relationId); static void CreateTableMetadataOnWorkers(Oid relationId); static void CreateDependingViewsOnWorkers(Oid relationId); +static void AddTableToPublications(Oid relationId); static NodeMetadataSyncResult SyncNodeMetadataToNodesOptional(void); static bool ShouldSyncTableMetadataInternal(bool hashDistributed, bool citusTableWithNoDistKey); @@ -192,8 +194,20 @@ start_metadata_sync_to_node(PG_FUNCTION_ARGS) EnsureCoordinator(); char *nodeNameString = text_to_cstring(nodeName); + WorkerNode *workerNode = ModifiableWorkerNode(nodeNameString, nodePort); - ActivateNode(nodeNameString, nodePort); + /* + * Create MetadataSyncContext which is used throughout nodes' activation. + * It contains activated nodes, bare connections if the mode is nontransactional, + * and a memory context for allocation. + */ + bool collectCommands = false; + bool nodesAddedInSameTransaction = false; + MetadataSyncContext *context = CreateMetadataSyncContext(list_make1(workerNode), + collectCommands, + nodesAddedInSameTransaction); + + ActivateNodeList(context); TransactionModifiedNodeMetadata = true; PG_RETURN_VOID(); @@ -213,96 +227,33 @@ start_metadata_sync_to_all_nodes(PG_FUNCTION_ARGS) EnsureSuperUser(); EnsureCoordinator(); - List *workerNodes = ActivePrimaryNonCoordinatorNodeList(RowShareLock); + List *nodeList = ActivePrimaryNonCoordinatorNodeList(RowShareLock); - ActivateNodeList(workerNodes); + /* + * Create MetadataSyncContext which is used throughout nodes' activation. + * It contains activated nodes, bare connections if the mode is nontransactional, + * and a memory context for allocation. + */ + bool collectCommands = false; + bool nodesAddedInSameTransaction = false; + MetadataSyncContext *context = CreateMetadataSyncContext(nodeList, + collectCommands, + nodesAddedInSameTransaction); + + ActivateNodeList(context); TransactionModifiedNodeMetadata = true; PG_RETURN_BOOL(true); } -/* - * SyncNodeMetadataToNode is the internal API for - * start_metadata_sync_to_node(). - */ -void -SyncNodeMetadataToNode(const char *nodeNameString, int32 nodePort) -{ - char *escapedNodeName = quote_literal_cstr(nodeNameString); - - CheckCitusVersion(ERROR); - EnsureCoordinator(); - EnsureModificationsCanRun(); - - EnsureSequentialModeMetadataOperations(); - - LockRelationOid(DistNodeRelationId(), ExclusiveLock); - - WorkerNode *workerNode = FindWorkerNode(nodeNameString, nodePort); - if (workerNode == NULL) - { - ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("you cannot sync metadata to a non-existent node"), - errhint("First, add the node with SELECT citus_add_node" - "(%s,%d)", escapedNodeName, nodePort))); - } - - if (!workerNode->isActive) - { - ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("you cannot sync metadata to an inactive node"), - errhint("First, activate the node with " - "SELECT citus_activate_node(%s,%d)", - escapedNodeName, nodePort))); - } - - if (NodeIsCoordinator(workerNode)) - { - ereport(NOTICE, (errmsg("%s:%d is the coordinator and already contains " - "metadata, skipping syncing the metadata", - nodeNameString, nodePort))); - return; - } - - UseCoordinatedTransaction(); - - /* - * One would normally expect to set hasmetadata first, and then metadata sync. - * However, at this point we do the order reverse. - * We first set metadatasynced, and then hasmetadata; since setting columns for - * nodes with metadatasynced==false could cause errors. - * (See ErrorIfAnyMetadataNodeOutOfSync) - * We can safely do that because we are in a coordinated transaction and the changes - * are only visible to our own transaction. - * If anything goes wrong, we are going to rollback all the changes. - */ - workerNode = SetWorkerColumn(workerNode, Anum_pg_dist_node_metadatasynced, - BoolGetDatum(true)); - workerNode = SetWorkerColumn(workerNode, Anum_pg_dist_node_hasmetadata, BoolGetDatum( - true)); - - if (!NodeIsPrimary(workerNode)) - { - /* - * If this is a secondary node we can't actually sync metadata to it; we assume - * the primary node is receiving metadata. - */ - return; - } - - /* fail if metadata synchronization doesn't succeed */ - bool raiseInterrupts = true; - SyncNodeMetadataSnapshotToNode(workerNode, raiseInterrupts); -} - - /* * SyncCitusTableMetadata syncs citus table metadata to worker nodes with metadata. * Our definition of metadata includes the shell table and its inter relations with * other shell tables, corresponding pg_dist_object, pg_dist_partiton, pg_dist_shard * and pg_dist_shard placement entries. This function also propagates the views that - * depend on the given relation, to the metadata workers. + * depend on the given relation, to the metadata workers, and adds the relation to + * the appropriate publications. */ void SyncCitusTableMetadata(Oid relationId) @@ -319,6 +270,7 @@ SyncCitusTableMetadata(Oid relationId) } CreateDependingViewsOnWorkers(relationId); + AddTableToPublications(relationId); } @@ -364,6 +316,49 @@ CreateDependingViewsOnWorkers(Oid relationId) } +/* + * AddTableToPublications adds the table to a publication on workers with metadata. + */ +static void +AddTableToPublications(Oid relationId) +{ + List *publicationIds = GetRelationPublications(relationId); + if (publicationIds == NIL) + { + return; + } + + Oid publicationId = InvalidOid; + + SendCommandToWorkersWithMetadata(DISABLE_DDL_PROPAGATION); + + foreach_oid(publicationId, publicationIds) + { + ObjectAddress *publicationAddress = palloc0(sizeof(ObjectAddress)); + ObjectAddressSet(*publicationAddress, PublicationRelationId, publicationId); + List *addresses = list_make1(publicationAddress); + + if (!ShouldPropagateAnyObject(addresses)) + { + /* skip non-distributed publications */ + continue; + } + + /* ensure schemas exist */ + EnsureAllObjectDependenciesExistOnAllNodes(addresses); + + bool isAdd = true; + char *alterPublicationCommand = + GetAlterPublicationTableDDLCommand(publicationId, relationId, isAdd); + + /* send ALTER PUBLICATION .. ADD to workers with metadata */ + SendCommandToWorkersWithMetadata(alterPublicationCommand); + } + + SendCommandToWorkersWithMetadata(ENABLE_DDL_PROPAGATION); +} + + /* * EnsureSequentialModeMetadataOperations makes sure that the current transaction is * already in sequential mode, or can still safely be put in sequential mode, @@ -535,7 +530,7 @@ ShouldSyncTableMetadata(Oid relationId) bool hashDistributed = IsCitusTableTypeCacheEntry(tableEntry, HASH_DISTRIBUTED); bool citusTableWithNoDistKey = - IsCitusTableTypeCacheEntry(tableEntry, CITUS_TABLE_WITH_NO_DIST_KEY); + !HasDistributionKeyCacheEntry(tableEntry); return ShouldSyncTableMetadataInternal(hashDistributed, citusTableWithNoDistKey); } @@ -566,6 +561,25 @@ ShouldSyncTableMetadataViaCatalog(Oid relationId) } +/* + * FetchRelationIdFromPgPartitionHeapTuple returns relation id from given heap tuple. + */ +Oid +FetchRelationIdFromPgPartitionHeapTuple(HeapTuple heapTuple, TupleDesc tupleDesc) +{ + Assert(heapTuple->t_tableOid == DistPartitionRelationId()); + + bool isNullArray[Natts_pg_dist_partition]; + Datum datumArray[Natts_pg_dist_partition]; + heap_deform_tuple(heapTuple, tupleDesc, datumArray, isNullArray); + + Datum relationIdDatum = datumArray[Anum_pg_dist_partition_logicalrelid - 1]; + Oid relationId = DatumGetObjectId(relationIdDatum); + + return relationId; +} + + /* * ShouldSyncTableMetadataInternal decides whether we should sync the metadata for a table * based on whether it is a hash distributed table, or a citus table with no distribution @@ -669,11 +683,12 @@ DropMetadataSnapshotOnNode(WorkerNode *workerNode) * Detach partitions, break dependencies between sequences and table then * remove shell tables first. */ + bool singleTransaction = true; List *dropMetadataCommandList = DetachPartitionCommandList(); dropMetadataCommandList = lappend(dropMetadataCommandList, BREAK_CITUS_TABLE_SEQUENCE_DEPENDENCY_COMMAND); dropMetadataCommandList = lappend(dropMetadataCommandList, - REMOVE_ALL_SHELL_TABLES_COMMAND); + WorkerDropAllShellTablesCommand(singleTransaction)); dropMetadataCommandList = list_concat(dropMetadataCommandList, NodeMetadataDropCommands()); dropMetadataCommandList = lappend(dropMetadataCommandList, @@ -723,114 +738,6 @@ NodeMetadataCreateCommands(void) } -/* - * DistributedObjectMetadataSyncCommandList returns the necessary commands to create - * pg_dist_object entries on the new node. - */ -List * -DistributedObjectMetadataSyncCommandList(void) -{ - HeapTuple pgDistObjectTup = NULL; - Relation pgDistObjectRel = table_open(DistObjectRelationId(), AccessShareLock); - Relation pgDistObjectIndexRel = index_open(DistObjectPrimaryKeyIndexId(), - AccessShareLock); - TupleDesc pgDistObjectDesc = RelationGetDescr(pgDistObjectRel); - - List *objectAddressList = NIL; - List *distArgumentIndexList = NIL; - List *colocationIdList = NIL; - List *forceDelegationList = NIL; - - /* It is not strictly necessary to read the tuples in order. - * However, it is useful to get consistent behavior, both for regression - * tests and also in production systems. - */ - SysScanDesc pgDistObjectScan = systable_beginscan_ordered(pgDistObjectRel, - pgDistObjectIndexRel, NULL, - 0, NULL); - while (HeapTupleIsValid(pgDistObjectTup = systable_getnext_ordered(pgDistObjectScan, - ForwardScanDirection))) - { - Form_pg_dist_object pg_dist_object = (Form_pg_dist_object) GETSTRUCT( - pgDistObjectTup); - - ObjectAddress *address = palloc(sizeof(ObjectAddress)); - - ObjectAddressSubSet(*address, pg_dist_object->classid, pg_dist_object->objid, - pg_dist_object->objsubid); - - bool distributionArgumentIndexIsNull = false; - Datum distributionArgumentIndexDatum = - heap_getattr(pgDistObjectTup, - Anum_pg_dist_object_distribution_argument_index, - pgDistObjectDesc, - &distributionArgumentIndexIsNull); - int32 distributionArgumentIndex = DatumGetInt32(distributionArgumentIndexDatum); - - bool colocationIdIsNull = false; - Datum colocationIdDatum = - heap_getattr(pgDistObjectTup, - Anum_pg_dist_object_colocationid, - pgDistObjectDesc, - &colocationIdIsNull); - int32 colocationId = DatumGetInt32(colocationIdDatum); - - bool forceDelegationIsNull = false; - Datum forceDelegationDatum = - heap_getattr(pgDistObjectTup, - Anum_pg_dist_object_force_delegation, - pgDistObjectDesc, - &forceDelegationIsNull); - bool forceDelegation = DatumGetBool(forceDelegationDatum); - - objectAddressList = lappend(objectAddressList, address); - - if (distributionArgumentIndexIsNull) - { - distArgumentIndexList = lappend_int(distArgumentIndexList, - INVALID_DISTRIBUTION_ARGUMENT_INDEX); - } - else - { - distArgumentIndexList = lappend_int(distArgumentIndexList, - distributionArgumentIndex); - } - - if (colocationIdIsNull) - { - colocationIdList = lappend_int(colocationIdList, - INVALID_COLOCATION_ID); - } - else - { - colocationIdList = lappend_int(colocationIdList, colocationId); - } - - if (forceDelegationIsNull) - { - forceDelegationList = lappend_int(forceDelegationList, NO_FORCE_PUSHDOWN); - } - else - { - forceDelegationList = lappend_int(forceDelegationList, forceDelegation); - } - } - - systable_endscan_ordered(pgDistObjectScan); - index_close(pgDistObjectIndexRel, AccessShareLock); - relation_close(pgDistObjectRel, NoLock); - - char *workerMetadataUpdateCommand = - MarkObjectsDistributedCreateCommand(objectAddressList, - distArgumentIndexList, - colocationIdList, - forceDelegationList); - List *commandList = list_make1(workerMetadataUpdateCommand); - - return commandList; -} - - /* * CitusTableMetadataCreateCommandList returns the set of commands necessary to * create the given distributed table metadata on a worker. @@ -943,6 +850,35 @@ NodeListInsertCommand(List *workerNodeList) } +/* + * NodeListIdempotentInsertCommand generates an idempotent multi-row INSERT command that + * can be executed to insert the nodes that are in workerNodeList to pg_dist_node table. + * It would insert new nodes or replace current nodes with new nodes if nodename-nodeport + * pairs already exist. + */ +char * +NodeListIdempotentInsertCommand(List *workerNodeList) +{ + StringInfo nodeInsertIdempotentCommand = makeStringInfo(); + char *nodeInsertStr = NodeListInsertCommand(workerNodeList); + appendStringInfoString(nodeInsertIdempotentCommand, nodeInsertStr); + char *onConflictStr = " ON CONFLICT ON CONSTRAINT pg_dist_node_nodename_nodeport_key " + "DO UPDATE SET nodeid = EXCLUDED.nodeid, " + "groupid = EXCLUDED.groupid, " + "nodename = EXCLUDED.nodename, " + "nodeport = EXCLUDED.nodeport, " + "noderack = EXCLUDED.noderack, " + "hasmetadata = EXCLUDED.hasmetadata, " + "isactive = EXCLUDED.isactive, " + "noderole = EXCLUDED.noderole, " + "nodecluster = EXCLUDED.nodecluster ," + "metadatasynced = EXCLUDED.metadatasynced, " + "shouldhaveshards = EXCLUDED.shouldhaveshards"; + appendStringInfoString(nodeInsertIdempotentCommand, onConflictStr); + return nodeInsertIdempotentCommand->data; +} + + /* * MarkObjectsDistributedCreateCommand generates a command that can be executed to * insert or update the provided objects into pg_dist_object on a worker node. @@ -1158,7 +1094,7 @@ DistributionCreateCommand(CitusTableCacheEntry *cacheEntry) char replicationModel = cacheEntry->replicationModel; StringInfo tablePartitionKeyNameString = makeStringInfo(); - if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKeyCacheEntry(cacheEntry)) { appendStringInfo(tablePartitionKeyNameString, "NULL"); } @@ -1586,10 +1522,13 @@ GetAttributeTypeOid(Oid relationId, AttrNumber attnum) * For both cases, we use the intermediate AttrDefault object from pg_depend. * If attnum is specified, we only return the sequences related to that * attribute of the relationId. + * See DependencyType for the possible values of depType. + * We use DEPENDENCY_INTERNAL for sequences created by identity column. + * DEPENDENCY_AUTO for regular sequences. */ void GetDependentSequencesWithRelation(Oid relationId, List **seqInfoList, - AttrNumber attnum) + AttrNumber attnum, char depType) { Assert(*seqInfoList == NIL); @@ -1626,7 +1565,7 @@ GetDependentSequencesWithRelation(Oid relationId, List **seqInfoList, if (deprec->classid == AttrDefaultRelationId && deprec->objsubid == 0 && deprec->refobjsubid != 0 && - deprec->deptype == DEPENDENCY_AUTO) + deprec->deptype == depType) { /* * We are going to generate corresponding SequenceInfo @@ -1635,8 +1574,7 @@ GetDependentSequencesWithRelation(Oid relationId, List **seqInfoList, attrdefResult = lappend_oid(attrdefResult, deprec->objid); attrdefAttnumResult = lappend_int(attrdefAttnumResult, deprec->refobjsubid); } - else if ((deprec->deptype == DEPENDENCY_AUTO || deprec->deptype == - DEPENDENCY_INTERNAL) && + else if (deprec->deptype == depType && deprec->refobjsubid != 0 && deprec->classid == RelationRelationId && get_rel_relkind(deprec->objid) == RELKIND_SEQUENCE) @@ -1883,6 +1821,53 @@ SequenceDependencyCommandList(Oid relationId) } +/* + * IdentitySequenceDependencyCommandList generate a command to execute + * a UDF (WORKER_ADJUST_IDENTITY_COLUMN_SEQ_RANGES) on workers to modify the identity + * columns min/max values to produce unique values on workers. + */ +List * +IdentitySequenceDependencyCommandList(Oid targetRelationId) +{ + List *commandList = NIL; + + Relation relation = relation_open(targetRelationId, AccessShareLock); + TupleDesc tupleDescriptor = RelationGetDescr(relation); + + bool tableHasIdentityColumn = false; + for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts; + attributeIndex++) + { + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex); + + if (attributeForm->attidentity) + { + tableHasIdentityColumn = true; + break; + } + } + + relation_close(relation, NoLock); + + if (tableHasIdentityColumn) + { + StringInfo stringInfo = makeStringInfo(); + char *tableName = generate_qualified_relation_name(targetRelationId); + + appendStringInfo(stringInfo, + WORKER_ADJUST_IDENTITY_COLUMN_SEQ_RANGES, + quote_literal_cstr(tableName)); + + + commandList = lappend(commandList, + makeTableDDLCommandString( + stringInfo->data)); + } + + return commandList; +} + + /* * CreateSequenceDependencyCommand generates a query string for calling * worker_record_sequence_dependency on the worker to recreate a sequence->table @@ -2605,8 +2590,7 @@ CreateShellTableOnWorkers(Oid relationId) List *commandList = list_make1(DISABLE_DDL_PROPAGATION); IncludeSequenceDefaults includeSequenceDefaults = WORKER_NEXTVAL_SEQUENCE_DEFAULTS; - IncludeIdentities includeIdentityDefaults = - INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS; + IncludeIdentities includeIdentityDefaults = INCLUDE_IDENTITY; bool creatingShellTableOnRemoteNode = true; List *tableDDLCommands = GetFullTableCreationCommands(relationId, @@ -3296,7 +3280,6 @@ EnsureCoordinatorInitiatedOperation(void) * by the coordinator. */ if (!(IsCitusInternalBackend() || IsRebalancerInternalBackend()) || - !MyBackendIsInDisributedTransaction() || GetLocalGroupId() == COORDINATOR_GROUP_ID) { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), @@ -3954,47 +3937,493 @@ ColocationGroupDeleteCommand(uint32 colocationId) /* - * ColocationGroupCreateCommandList returns the full list of commands for syncing - * pg_dist_colocation. + * SetMetadataSyncNodesFromNodeList sets list of nodes that needs to be metadata + * synced among given node list into metadataSyncContext. */ -List * -ColocationGroupCreateCommandList(void) +void +SetMetadataSyncNodesFromNodeList(MetadataSyncContext *context, List *nodeList) { - bool hasColocations = false; + /* sync is disabled, then no nodes to sync */ + if (!EnableMetadataSync) + { + return; + } - StringInfo colocationGroupCreateCommand = makeStringInfo(); - appendStringInfo(colocationGroupCreateCommand, - "WITH colocation_group_data (colocationid, shardcount, " - "replicationfactor, distributioncolumntype, " - "distributioncolumncollationname, " - "distributioncolumncollationschema) AS (VALUES "); + List *activatedWorkerNodeList = NIL; - Relation pgDistColocation = table_open(DistColocationRelationId(), AccessShareLock); - Relation colocationIdIndexRel = index_open(DistColocationIndexId(), AccessShareLock); + WorkerNode *node = NULL; + foreach_ptr(node, nodeList) + { + if (NodeIsPrimary(node)) + { + /* warn if we have coordinator in nodelist */ + if (NodeIsCoordinator(node)) + { + ereport(NOTICE, (errmsg("%s:%d is the coordinator and already contains " + "metadata, skipping syncing the metadata", + node->workerName, node->workerPort))); + continue; + } + + activatedWorkerNodeList = lappend(activatedWorkerNodeList, node); + } + } + + context->activatedWorkerNodeList = activatedWorkerNodeList; +} + + +/* + * EstablishAndSetMetadataSyncBareConnections establishes and sets + * connections used throughout nontransactional metadata sync. + */ +void +EstablishAndSetMetadataSyncBareConnections(MetadataSyncContext *context) +{ + Assert(MetadataSyncTransMode == METADATA_SYNC_NON_TRANSACTIONAL); + + int connectionFlags = REQUIRE_METADATA_CONNECTION; + + /* establish bare connections to activated worker nodes */ + List *bareConnectionList = NIL; + WorkerNode *node = NULL; + foreach_ptr(node, context->activatedWorkerNodeList) + { + MultiConnection *connection = GetNodeUserDatabaseConnection(connectionFlags, + node->workerName, + node->workerPort, + CurrentUserName(), + NULL); + + Assert(connection != NULL); + ForceConnectionCloseAtTransactionEnd(connection); + bareConnectionList = lappend(bareConnectionList, connection); + } + + context->activatedWorkerBareConnections = bareConnectionList; +} + + +/* + * CreateMetadataSyncContext creates a context which contains worker connections + * and a MemoryContext to be used throughout the metadata sync. + * + * If we collect commands, connections will not be established as caller's intent + * is to collect sync commands. + * + * If the nodes are newly added before activation, we would not try to unset + * metadatasynced in separate transaction during nontransactional metadatasync. + */ +MetadataSyncContext * +CreateMetadataSyncContext(List *nodeList, bool collectCommands, + bool nodesAddedInSameTransaction) +{ + /* should be alive during local transaction during the sync */ + MemoryContext context = AllocSetContextCreate(TopTransactionContext, + "metadata_sync_context", + ALLOCSET_DEFAULT_SIZES); + + MetadataSyncContext *metadataSyncContext = (MetadataSyncContext *) palloc0( + sizeof(MetadataSyncContext)); + + metadataSyncContext->context = context; + metadataSyncContext->transactionMode = MetadataSyncTransMode; + metadataSyncContext->collectCommands = collectCommands; + metadataSyncContext->collectedCommands = NIL; + metadataSyncContext->nodesAddedInSameTransaction = nodesAddedInSameTransaction; + + /* filter the nodes that needs to be activated from given node list */ + SetMetadataSyncNodesFromNodeList(metadataSyncContext, nodeList); /* - * It is not strictly necessary to read the tuples in order. - * However, it is useful to get consistent behavior, both for regression - * tests and also in production systems. + * establish connections only for nontransactional mode to prevent connection + * open-close for each command */ - SysScanDesc scanDescriptor = - systable_beginscan_ordered(pgDistColocation, colocationIdIndexRel, - NULL, 0, NULL); - - HeapTuple colocationTuple = systable_getnext_ordered(scanDescriptor, - ForwardScanDirection); - - while (HeapTupleIsValid(colocationTuple)) + if (!collectCommands && MetadataSyncTransMode == METADATA_SYNC_NON_TRANSACTIONAL) { - if (hasColocations) + EstablishAndSetMetadataSyncBareConnections(metadataSyncContext); + } + + /* use 2PC coordinated transactions if we operate in transactional mode */ + if (MetadataSyncTransMode == METADATA_SYNC_TRANSACTIONAL) + { + Use2PCForCoordinatedTransaction(); + } + + return metadataSyncContext; +} + + +/* + * ResetMetadataSyncMemoryContext resets memory context inside metadataSyncContext, if + * we are not collecting commands. + */ +void +ResetMetadataSyncMemoryContext(MetadataSyncContext *context) +{ + if (!MetadataSyncCollectsCommands(context)) + { + MemoryContextReset(context->context); + } +} + + +/* + * MetadataSyncCollectsCommands returns whether context is used for collecting + * commands instead of sending them to workers. + */ +bool +MetadataSyncCollectsCommands(MetadataSyncContext *context) +{ + return context->collectCommands; +} + + +/* + * SendOrCollectCommandListToActivatedNodes sends the commands to the activated nodes with + * bare connections inside metadatacontext or via coordinated connections. + * Note that when context only collects commands, we add commands into the context + * without sending the commands. + */ +void +SendOrCollectCommandListToActivatedNodes(MetadataSyncContext *context, List *commands) +{ + /* do nothing if no commands */ + if (commands == NIL) + { + return; + } + + /* + * do not send any command to workers if we collect commands. + * Collect commands into metadataSyncContext's collected command + * list. + */ + if (MetadataSyncCollectsCommands(context)) + { + context->collectedCommands = list_concat(context->collectedCommands, commands); + return; + } + + /* send commands to new workers, the current user should be a superuser */ + Assert(superuser()); + + if (context->transactionMode == METADATA_SYNC_TRANSACTIONAL) + { + List *workerNodes = context->activatedWorkerNodeList; + SendMetadataCommandListToWorkerListInCoordinatedTransaction(workerNodes, + CurrentUserName(), + commands); + } + else if (context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL) + { + List *workerConnections = context->activatedWorkerBareConnections; + SendCommandListToWorkerListWithBareConnections(workerConnections, commands); + } + else + { + pg_unreachable(); + } +} + + +/* + * SendOrCollectCommandListToMetadataNodes sends the commands to the metadata nodes with + * bare connections inside metadatacontext or via coordinated connections. + * Note that when context only collects commands, we add commands into the context + * without sending the commands. + */ +void +SendOrCollectCommandListToMetadataNodes(MetadataSyncContext *context, List *commands) +{ + /* + * do not send any command to workers if we collcet commands. + * Collect commands into metadataSyncContext's collected command + * list. + */ + if (MetadataSyncCollectsCommands(context)) + { + context->collectedCommands = list_concat(context->collectedCommands, commands); + return; + } + + /* send commands to new workers, the current user should be a superuser */ + Assert(superuser()); + + if (context->transactionMode == METADATA_SYNC_TRANSACTIONAL) + { + List *metadataNodes = TargetWorkerSetNodeList(NON_COORDINATOR_METADATA_NODES, + RowShareLock); + SendMetadataCommandListToWorkerListInCoordinatedTransaction(metadataNodes, + CurrentUserName(), + commands); + } + else if (context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL) + { + SendBareCommandListToMetadataWorkers(commands); + } + else + { + pg_unreachable(); + } +} + + +/* + * SendOrCollectCommandListToSingleNode sends the commands to the specific worker + * indexed by nodeIdx with bare connection inside metadatacontext or via coordinated + * connection. Note that when context only collects commands, we add commands into + * the context without sending the commands. + */ +void +SendOrCollectCommandListToSingleNode(MetadataSyncContext *context, List *commands, + int nodeIdx) +{ + /* + * Do not send any command to workers if we collect commands. + * Collect commands into metadataSyncContext's collected command + * list. + */ + if (MetadataSyncCollectsCommands(context)) + { + context->collectedCommands = list_concat(context->collectedCommands, commands); + return; + } + + /* send commands to new workers, the current user should be a superuser */ + Assert(superuser()); + + if (context->transactionMode == METADATA_SYNC_TRANSACTIONAL) + { + List *workerNodes = context->activatedWorkerNodeList; + Assert(nodeIdx < list_length(workerNodes)); + + WorkerNode *node = list_nth(workerNodes, nodeIdx); + SendMetadataCommandListToWorkerListInCoordinatedTransaction(list_make1(node), + CurrentUserName(), + commands); + } + else if (context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL) + { + List *workerConnections = context->activatedWorkerBareConnections; + Assert(nodeIdx < list_length(workerConnections)); + + MultiConnection *workerConnection = list_nth(workerConnections, nodeIdx); + List *connectionList = list_make1(workerConnection); + SendCommandListToWorkerListWithBareConnections(connectionList, commands); + } + else + { + pg_unreachable(); + } +} + + +/* + * WorkerDropAllShellTablesCommand returns command required to drop shell tables + * from workers. When singleTransaction is false, we create transaction per shell + * table. Otherwise, we drop all shell tables within single transaction. + */ +char * +WorkerDropAllShellTablesCommand(bool singleTransaction) +{ + char *singleTransactionString = (singleTransaction) ? "true" : "false"; + StringInfo removeAllShellTablesCommand = makeStringInfo(); + appendStringInfo(removeAllShellTablesCommand, WORKER_DROP_ALL_SHELL_TABLES, + singleTransactionString); + return removeAllShellTablesCommand->data; +} + + +/* + * PropagateNodeWideObjectsCommandList is called during node activation to + * propagate any object that should be propagated for every node. These are + * generally not linked to any distributed object but change system wide behaviour. + */ +static List * +PropagateNodeWideObjectsCommandList(void) +{ + /* collect all commands */ + List *ddlCommands = NIL; + + if (EnableAlterRoleSetPropagation) + { + /* + * Get commands for database and postgres wide settings. Since these settings are not + * linked to any role that can be distributed we need to distribute them seperately + */ + List *alterRoleSetCommands = GenerateAlterRoleSetCommandForRole(InvalidOid); + ddlCommands = list_concat(ddlCommands, alterRoleSetCommands); + } + + return ddlCommands; +} + + +/* + * SyncDistributedObjects sync the distributed objects to the nodes in metadataSyncContext + * with transactional or nontransactional mode according to transactionMode inside + * metadataSyncContext. + * + * Transactions should be ordered like below: + * - Nodewide objects (only roles for now), + * - Deletion of sequence and shell tables and metadata entries + * - All dependencies (e.g., types, schemas, sequences) and all shell distributed + * table and their pg_dist_xx metadata entries + * - Inter relation between those shell tables + * + * Note that we do not create the distributed dependencies on the coordinator + * since all the dependencies should be present in the coordinator already. + */ +void +SyncDistributedObjects(MetadataSyncContext *context) +{ + if (context->activatedWorkerNodeList == NIL) + { + return; + } + + EnsureSequentialModeMetadataOperations(); + + Assert(ShouldPropagate()); + + /* Send systemwide objects, only roles for now */ + SendNodeWideObjectsSyncCommands(context); + + /* + * Break dependencies between sequences-shell tables, then remove shell tables, + * and metadata tables respectively. + * We should delete shell tables before metadata entries as we look inside + * pg_dist_partition to figure out shell tables. + */ + SendShellTableDeletionCommands(context); + SendMetadataDeletionCommands(context); + + /* + * Commands to insert pg_dist_colocation entries. + * Replicating dist objects and their metadata depends on this step. + */ + SendColocationMetadataCommands(context); + + /* + * Replicate all objects of the pg_dist_object to the remote node and + * create metadata entries for Citus tables (pg_dist_shard, pg_dist_shard_placement, + * pg_dist_partition, pg_dist_object). + */ + SendDependencyCreationCommands(context); + SendDistTableMetadataCommands(context); + SendDistObjectCommands(context); + + /* + * After creating each table, handle the inter table relationship between + * those tables. + */ + SendInterTableRelationshipCommands(context); +} + + +/* + * SendNodeWideObjectsSyncCommands sends systemwide objects to workers with + * transactional or nontransactional mode according to transactionMode inside + * metadataSyncContext. + */ +void +SendNodeWideObjectsSyncCommands(MetadataSyncContext *context) +{ + /* propagate node wide objects. It includes only roles for now. */ + List *commandList = PropagateNodeWideObjectsCommandList(); + + if (commandList == NIL) + { + return; + } + + commandList = lcons(DISABLE_DDL_PROPAGATION, commandList); + commandList = lappend(commandList, ENABLE_DDL_PROPAGATION); + SendOrCollectCommandListToActivatedNodes(context, commandList); +} + + +/* + * SendShellTableDeletionCommands sends sequence, and shell table deletion + * commands to workers with transactional or nontransactional mode according to + * transactionMode inside metadataSyncContext. + */ +void +SendShellTableDeletionCommands(MetadataSyncContext *context) +{ + /* break all sequence deps for citus tables and remove all shell tables */ + char *breakSeqDepsCommand = BREAK_CITUS_TABLE_SEQUENCE_DEPENDENCY_COMMAND; + SendOrCollectCommandListToActivatedNodes(context, list_make1(breakSeqDepsCommand)); + + /* remove shell tables */ + bool singleTransaction = (context->transactionMode == METADATA_SYNC_TRANSACTIONAL); + char *dropShellTablesCommand = WorkerDropAllShellTablesCommand(singleTransaction); + SendOrCollectCommandListToActivatedNodes(context, list_make1(dropShellTablesCommand)); +} + + +/* + * SendMetadataDeletionCommands sends metadata entry deletion commands to workers + * with transactional or nontransactional mode according to transactionMode inside + * metadataSyncContext. + */ +void +SendMetadataDeletionCommands(MetadataSyncContext *context) +{ + /* remove pg_dist_partition entries */ + SendOrCollectCommandListToActivatedNodes(context, list_make1(DELETE_ALL_PARTITIONS)); + + /* remove pg_dist_shard entries */ + SendOrCollectCommandListToActivatedNodes(context, list_make1(DELETE_ALL_SHARDS)); + + /* remove pg_dist_placement entries */ + SendOrCollectCommandListToActivatedNodes(context, list_make1(DELETE_ALL_PLACEMENTS)); + + /* remove pg_dist_object entries */ + SendOrCollectCommandListToActivatedNodes(context, + list_make1(DELETE_ALL_DISTRIBUTED_OBJECTS)); + + /* remove pg_dist_colocation entries */ + SendOrCollectCommandListToActivatedNodes(context, list_make1(DELETE_ALL_COLOCATION)); +} + + +/* + * SendColocationMetadataCommands sends colocation metadata with transactional or + * nontransactional mode according to transactionMode inside metadataSyncContext. + */ +void +SendColocationMetadataCommands(MetadataSyncContext *context) +{ + ScanKeyData scanKey[1]; + int scanKeyCount = 0; + + Relation relation = table_open(DistColocationRelationId(), AccessShareLock); + SysScanDesc scanDesc = systable_beginscan(relation, InvalidOid, false, NULL, + scanKeyCount, scanKey); + + MemoryContext oldContext = MemoryContextSwitchTo(context->context); + HeapTuple nextTuple = NULL; + while (true) + { + ResetMetadataSyncMemoryContext(context); + + nextTuple = systable_getnext(scanDesc); + if (!HeapTupleIsValid(nextTuple)) { - appendStringInfo(colocationGroupCreateCommand, ", "); + break; } - hasColocations = true; + StringInfo colocationGroupCreateCommand = makeStringInfo(); + appendStringInfo(colocationGroupCreateCommand, + "WITH colocation_group_data (colocationid, shardcount, " + "replicationfactor, distributioncolumntype, " + "distributioncolumncollationname, " + "distributioncolumncollationschema) AS (VALUES "); Form_pg_dist_colocation colocationForm = - (Form_pg_dist_colocation) GETSTRUCT(colocationTuple); + (Form_pg_dist_colocation) GETSTRUCT(nextTuple); appendStringInfo(colocationGroupCreateCommand, "(%d, %d, %d, %s, ", @@ -4012,20 +4441,17 @@ ColocationGroupCreateCommandList(void) { Datum collationIdDatum = ObjectIdGetDatum(distributionColumCollation); HeapTuple collationTuple = SearchSysCache1(COLLOID, collationIdDatum); - if (HeapTupleIsValid(collationTuple)) { Form_pg_collation collationform = (Form_pg_collation) GETSTRUCT(collationTuple); char *collationName = NameStr(collationform->collname); - char *collationSchemaName = get_namespace_name( - collationform->collnamespace); - + char *collationSchemaName = + get_namespace_name(collationform->collnamespace); appendStringInfo(colocationGroupCreateCommand, "%s, %s)", quote_literal_cstr(collationName), quote_literal_cstr(collationSchemaName)); - ReleaseSysCache(collationTuple); } else @@ -4040,26 +4466,290 @@ ColocationGroupCreateCommandList(void) "NULL, NULL)"); } - colocationTuple = systable_getnext_ordered(scanDescriptor, ForwardScanDirection); + appendStringInfo(colocationGroupCreateCommand, + ") SELECT pg_catalog.citus_internal_add_colocation_metadata(" + "colocationid, shardcount, replicationfactor, " + "distributioncolumntype, coalesce(c.oid, 0)) " + "FROM colocation_group_data d LEFT JOIN pg_collation c " + "ON (d.distributioncolumncollationname = c.collname " + "AND d.distributioncolumncollationschema::regnamespace" + " = c.collnamespace)"); + + List *commandList = list_make1(colocationGroupCreateCommand->data); + SendOrCollectCommandListToActivatedNodes(context, commandList); } + MemoryContextSwitchTo(oldContext); - systable_endscan_ordered(scanDescriptor); - index_close(colocationIdIndexRel, AccessShareLock); - table_close(pgDistColocation, AccessShareLock); - - if (!hasColocations) - { - return NIL; - } - - appendStringInfo(colocationGroupCreateCommand, - ") SELECT pg_catalog.citus_internal_add_colocation_metadata(" - "colocationid, shardcount, replicationfactor, " - "distributioncolumntype, coalesce(c.oid, 0)) " - "FROM colocation_group_data d LEFT JOIN pg_collation c " - "ON (d.distributioncolumncollationname = c.collname " - "AND d.distributioncolumncollationschema::regnamespace" - " = c.collnamespace)"); - - return list_make1(colocationGroupCreateCommand->data); + systable_endscan(scanDesc); + table_close(relation, AccessShareLock); +} + + +/* + * SendDependencyCreationCommands sends dependency creation commands to workers + * with transactional or nontransactional mode according to transactionMode + * inside metadataSyncContext. + */ +void +SendDependencyCreationCommands(MetadataSyncContext *context) +{ + /* disable ddl propagation */ + SendOrCollectCommandListToActivatedNodes(context, + list_make1(DISABLE_DDL_PROPAGATION)); + + MemoryContext oldContext = MemoryContextSwitchTo(context->context); + + /* collect all dependencies in creation order and get their ddl commands */ + List *dependencies = GetDistributedObjectAddressList(); + + /* + * Depending on changes in the environment, such as the enable_metadata_sync guc + * there might be objects in the distributed object address list that should currently + * not be propagated by citus as they are 'not supported'. + */ + dependencies = FilterObjectAddressListByPredicate(dependencies, + &SupportedDependencyByCitus); + + dependencies = OrderObjectAddressListInDependencyOrder(dependencies); + + /* + * We need to create a subcontext as we reset the context after each dependency + * creation but we want to preserve all dependency objects at metadataSyncContext. + */ + MemoryContext commandsContext = AllocSetContextCreate(context->context, + "dependency commands context", + ALLOCSET_DEFAULT_SIZES); + MemoryContextSwitchTo(commandsContext); + ObjectAddress *dependency = NULL; + foreach_ptr(dependency, dependencies) + { + if (!MetadataSyncCollectsCommands(context)) + { + MemoryContextReset(commandsContext); + } + + if (IsAnyObjectAddressOwnedByExtension(list_make1(dependency), NULL)) + { + /* + * We expect extension-owned objects to be created as a result + * of the extension being created. + */ + continue; + } + + /* dependency creation commands */ + List *ddlCommands = GetAllDependencyCreateDDLCommands(list_make1(dependency)); + SendOrCollectCommandListToActivatedNodes(context, ddlCommands); + } + MemoryContextSwitchTo(oldContext); + + if (!MetadataSyncCollectsCommands(context)) + { + MemoryContextDelete(commandsContext); + } + ResetMetadataSyncMemoryContext(context); + + /* enable ddl propagation */ + SendOrCollectCommandListToActivatedNodes(context, list_make1(ENABLE_DDL_PROPAGATION)); +} + + +/* + * SendDistTableMetadataCommands sends commands related to pg_dist_shard and, + * pg_dist_shard_placement entries to workers with transactional or nontransactional + * mode according to transactionMode inside metadataSyncContext. + */ +void +SendDistTableMetadataCommands(MetadataSyncContext *context) +{ + ScanKeyData scanKey[1]; + int scanKeyCount = 0; + + Relation relation = table_open(DistPartitionRelationId(), AccessShareLock); + TupleDesc tupleDesc = RelationGetDescr(relation); + + SysScanDesc scanDesc = systable_beginscan(relation, InvalidOid, false, NULL, + scanKeyCount, scanKey); + + MemoryContext oldContext = MemoryContextSwitchTo(context->context); + HeapTuple nextTuple = NULL; + while (true) + { + ResetMetadataSyncMemoryContext(context); + + nextTuple = systable_getnext(scanDesc); + if (!HeapTupleIsValid(nextTuple)) + { + break; + } + + /* + * Create Citus table metadata commands (pg_dist_shard, pg_dist_shard_placement, + * pg_dist_partition). Only Citus tables have shard metadata. + */ + Oid relationId = FetchRelationIdFromPgPartitionHeapTuple(nextTuple, tupleDesc); + if (!ShouldSyncTableMetadata(relationId)) + { + continue; + } + + List *commandList = CitusTableMetadataCreateCommandList(relationId); + SendOrCollectCommandListToActivatedNodes(context, commandList); + } + MemoryContextSwitchTo(oldContext); + + systable_endscan(scanDesc); + table_close(relation, AccessShareLock); +} + + +/* + * SendDistObjectCommands sends commands related to pg_dist_object entries to + * workers with transactional or nontransactional mode according to transactionMode + * inside metadataSyncContext. + */ +void +SendDistObjectCommands(MetadataSyncContext *context) +{ + ScanKeyData scanKey[1]; + int scanKeyCount = 0; + + Relation relation = table_open(DistObjectRelationId(), AccessShareLock); + TupleDesc tupleDesc = RelationGetDescr(relation); + + SysScanDesc scanDesc = systable_beginscan(relation, InvalidOid, false, NULL, + scanKeyCount, scanKey); + + MemoryContext oldContext = MemoryContextSwitchTo(context->context); + HeapTuple nextTuple = NULL; + while (true) + { + ResetMetadataSyncMemoryContext(context); + + nextTuple = systable_getnext(scanDesc); + if (!HeapTupleIsValid(nextTuple)) + { + break; + } + + Form_pg_dist_object pg_dist_object = (Form_pg_dist_object) GETSTRUCT(nextTuple); + + ObjectAddress *address = palloc(sizeof(ObjectAddress)); + + ObjectAddressSubSet(*address, pg_dist_object->classid, pg_dist_object->objid, + pg_dist_object->objsubid); + + bool distributionArgumentIndexIsNull = false; + Datum distributionArgumentIndexDatum = + heap_getattr(nextTuple, + Anum_pg_dist_object_distribution_argument_index, + tupleDesc, + &distributionArgumentIndexIsNull); + int32 distributionArgumentIndex = DatumGetInt32(distributionArgumentIndexDatum); + + bool colocationIdIsNull = false; + Datum colocationIdDatum = + heap_getattr(nextTuple, + Anum_pg_dist_object_colocationid, + tupleDesc, + &colocationIdIsNull); + int32 colocationId = DatumGetInt32(colocationIdDatum); + + bool forceDelegationIsNull = false; + Datum forceDelegationDatum = + heap_getattr(nextTuple, + Anum_pg_dist_object_force_delegation, + tupleDesc, + &forceDelegationIsNull); + bool forceDelegation = DatumGetBool(forceDelegationDatum); + + if (distributionArgumentIndexIsNull) + { + distributionArgumentIndex = INVALID_DISTRIBUTION_ARGUMENT_INDEX; + } + + if (colocationIdIsNull) + { + colocationId = INVALID_COLOCATION_ID; + } + + if (forceDelegationIsNull) + { + forceDelegation = NO_FORCE_PUSHDOWN; + } + + char *workerMetadataUpdateCommand = + MarkObjectsDistributedCreateCommand(list_make1(address), + list_make1_int(distributionArgumentIndex), + list_make1_int(colocationId), + list_make1_int(forceDelegation)); + SendOrCollectCommandListToActivatedNodes(context, + list_make1(workerMetadataUpdateCommand)); + } + MemoryContextSwitchTo(oldContext); + + systable_endscan(scanDesc); + relation_close(relation, NoLock); +} + + +/* + * SendInterTableRelationshipCommands sends inter-table relationship commands + * (e.g. constraints, attach partitions) to workers with transactional or + * nontransactional mode per inter table relationship according to transactionMode + * inside metadataSyncContext. + */ +void +SendInterTableRelationshipCommands(MetadataSyncContext *context) +{ + /* disable ddl propagation */ + SendOrCollectCommandListToActivatedNodes(context, + list_make1(DISABLE_DDL_PROPAGATION)); + + ScanKeyData scanKey[1]; + int scanKeyCount = 0; + + Relation relation = table_open(DistPartitionRelationId(), AccessShareLock); + TupleDesc tupleDesc = RelationGetDescr(relation); + + SysScanDesc scanDesc = systable_beginscan(relation, InvalidOid, false, NULL, + scanKeyCount, scanKey); + + MemoryContext oldContext = MemoryContextSwitchTo(context->context); + HeapTuple nextTuple = NULL; + while (true) + { + ResetMetadataSyncMemoryContext(context); + + nextTuple = systable_getnext(scanDesc); + if (!HeapTupleIsValid(nextTuple)) + { + break; + } + + Oid relationId = FetchRelationIdFromPgPartitionHeapTuple(nextTuple, tupleDesc); + if (!ShouldSyncTableMetadata(relationId)) + { + continue; + } + + /* + * Skip foreign key and partition creation when the Citus table is + * owned by an extension. + */ + if (IsTableOwnedByExtension(relationId)) + { + continue; + } + + List *commandList = InterTableRelationshipOfRelationCommandList(relationId); + SendOrCollectCommandListToActivatedNodes(context, commandList); + } + MemoryContextSwitchTo(oldContext); + + systable_endscan(scanDesc); + table_close(relation, AccessShareLock); + + /* enable ddl propagation */ + SendOrCollectCommandListToActivatedNodes(context, list_make1(ENABLE_DDL_PROPAGATION)); } diff --git a/src/backend/distributed/metadata/metadata_utility.c b/src/backend/distributed/metadata/metadata_utility.c index dba509681..b25da1ebd 100644 --- a/src/backend/distributed/metadata/metadata_utility.c +++ b/src/backend/distributed/metadata/metadata_utility.c @@ -985,7 +985,7 @@ AppendShardSizeQuery(StringInfo selectQuery, ShardInterval *shardInterval) appendStringInfo(selectQuery, "SELECT " UINT64_FORMAT " AS shard_id, ", shardId); appendStringInfo(selectQuery, "%s AS shard_name, ", quotedShardName); - appendStringInfo(selectQuery, PG_RELATION_SIZE_FUNCTION, quotedShardName); + appendStringInfo(selectQuery, PG_TOTAL_RELATION_SIZE_FUNCTION, quotedShardName); } @@ -1670,6 +1670,48 @@ TupleToGroupShardPlacement(TupleDesc tupleDescriptor, HeapTuple heapTuple) } +/* + * LookupTaskPlacementHostAndPort sets the nodename and nodeport for the given task placement + * with a lookup. + */ +void +LookupTaskPlacementHostAndPort(ShardPlacement *taskPlacement, char **nodeName, + int *nodePort) +{ + if (IsDummyPlacement(taskPlacement)) + { + /* + * If we create a dummy placement for the local node, it is possible + * that the entry doesn't exist in pg_dist_node, hence a lookup will fail. + * In that case we want to use the dummy placements values. + */ + *nodeName = taskPlacement->nodeName; + *nodePort = taskPlacement->nodePort; + } + else + { + /* + * We want to lookup the node information again since it is possible that + * there were changes in pg_dist_node and we will get those invalidations + * in LookupNodeForGroup. + */ + WorkerNode *workerNode = LookupNodeForGroup(taskPlacement->groupId); + *nodeName = workerNode->workerName; + *nodePort = workerNode->workerPort; + } +} + + +/* + * IsDummyPlacement returns true if the given placement is a dummy placement. + */ +bool +IsDummyPlacement(ShardPlacement *taskPlacement) +{ + return taskPlacement->nodeId == LOCAL_NODE_ID; +} + + /* * InsertShardRow opens the shard system catalog, and inserts a new row with the * given values into that system catalog. Note that we allow the user to pass in diff --git a/src/backend/distributed/metadata/node_metadata.c b/src/backend/distributed/metadata/node_metadata.c index f6639f8d2..2639b79f0 100644 --- a/src/backend/distributed/metadata/node_metadata.c +++ b/src/backend/distributed/metadata/node_metadata.c @@ -48,6 +48,7 @@ #include "distributed/version_compat.h" #include "distributed/worker_manager.h" #include "distributed/worker_transaction.h" +#include "executor/spi.h" #include "lib/stringinfo.h" #include "postmaster/postmaster.h" #include "storage/bufmgr.h" @@ -90,24 +91,21 @@ static void RemoveNodeFromCluster(char *nodeName, int32 nodePort); static void ErrorIfNodeContainsNonRemovablePlacements(WorkerNode *workerNode); static bool PlacementHasActivePlacementOnAnotherGroup(GroupShardPlacement *sourcePlacement); -static int AddNodeMetadata(char *nodeName, int32 nodePort, NodeMetadata - *nodeMetadata, bool *nodeAlreadyExists); -static WorkerNode * SetNodeState(char *nodeName, int32 nodePort, bool isActive); +static int AddNodeMetadata(char *nodeName, int32 nodePort, NodeMetadata *nodeMetadata, + bool *nodeAlreadyExists, bool localOnly); +static int AddNodeMetadataViaMetadataContext(char *nodeName, int32 nodePort, + NodeMetadata *nodeMetadata, + bool *nodeAlreadyExists); static HeapTuple GetNodeTuple(const char *nodeName, int32 nodePort); +static HeapTuple GetNodeByNodeId(int32 nodeId); static int32 GetNextGroupId(void); static int GetNextNodeId(void); static void InsertPlaceholderCoordinatorRecord(void); static void InsertNodeRow(int nodeid, char *nodename, int32 nodeport, NodeMetadata *nodeMetadata); static void DeleteNodeRow(char *nodename, int32 nodeport); -static void SyncDistributedObjectsToNodeList(List *workerNodeList); -static void UpdateLocalGroupIdOnNode(WorkerNode *workerNode); -static void SyncPgDistTableMetadataToNodeList(List *nodeList); -static List * InterTableRelationshipCommandList(); static void BlockDistributedQueriesOnMetadataNodes(void); static WorkerNode * TupleToWorkerNode(TupleDesc tupleDescriptor, HeapTuple heapTuple); -static List * PropagateNodeWideObjectsCommandList(); -static WorkerNode * ModifiableWorkerNode(const char *nodeName, int32 nodePort); static bool NodeIsLocal(WorkerNode *worker); static void SetLockTimeoutLocally(int32 lock_cooldown); static void UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort); @@ -122,6 +120,19 @@ static void ErrorIfCoordinatorMetadataSetFalse(WorkerNode *workerNode, Datum val static WorkerNode * SetShouldHaveShards(WorkerNode *workerNode, bool shouldHaveShards); static int FindCoordinatorNodeId(void); static WorkerNode * FindNodeAnyClusterByNodeId(uint32 nodeId); +static void ErrorIfAnyNodeNotExist(List *nodeList); +static void UpdateLocalGroupIdsViaMetadataContext(MetadataSyncContext *context); +static void SendDeletionCommandsForReplicatedTablePlacements( + MetadataSyncContext *context); +static void SyncNodeMetadata(MetadataSyncContext *context); +static void SetNodeStateViaMetadataContext(MetadataSyncContext *context, + WorkerNode *workerNode, + Datum value); +static void MarkNodesNotSyncedInLoopBackConnection(MetadataSyncContext *context, + pid_t parentSessionPid); +static void EnsureParentSessionHasExclusiveLockOnPgDistNode(pid_t parentSessionPid); +static void SetNodeMetadata(MetadataSyncContext *context, bool localOnly); +static void EnsureTransactionalMetadataSyncMode(void); /* declarations for dynamic loading */ PG_FUNCTION_INFO_V1(citus_set_coordinator_host); @@ -146,6 +157,7 @@ PG_FUNCTION_INFO_V1(citus_nodename_for_nodeid); PG_FUNCTION_INFO_V1(citus_nodeport_for_nodeid); PG_FUNCTION_INFO_V1(citus_coordinator_nodeid); PG_FUNCTION_INFO_V1(citus_is_coordinator); +PG_FUNCTION_INFO_V1(citus_internal_mark_node_not_synced); /* @@ -188,16 +200,26 @@ citus_set_coordinator_host(PG_FUNCTION_ARGS) Name nodeClusterName = PG_GETARG_NAME(3); nodeMetadata.nodeCluster = NameStr(*nodeClusterName); + /* + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. + */ + if (nodeMetadata.nodeRole == SecondaryNodeRoleId()) + { + EnsureTransactionalMetadataSyncMode(); + } + bool isCoordinatorInMetadata = false; WorkerNode *coordinatorNode = PrimaryNodeForGroup(COORDINATOR_GROUP_ID, &isCoordinatorInMetadata); if (!isCoordinatorInMetadata) { bool nodeAlreadyExists = false; + bool localOnly = false; /* add the coordinator to pg_dist_node if it was not already added */ AddNodeMetadata(nodeNameString, nodePort, &nodeMetadata, - &nodeAlreadyExists); + &nodeAlreadyExists, localOnly); /* we just checked */ Assert(!nodeAlreadyExists); @@ -222,6 +244,21 @@ citus_set_coordinator_host(PG_FUNCTION_ARGS) } +/* + * EnsureTransactionalMetadataSyncMode ensures metadata sync mode is transactional. + */ +static void +EnsureTransactionalMetadataSyncMode(void) +{ + if (MetadataSyncTransMode == METADATA_SYNC_NON_TRANSACTIONAL) + { + ereport(ERROR, (errmsg("this operation cannot be completed in nontransactional " + "metadata sync mode"), + errhint("SET citus.metadata_sync_mode to 'transactional'"))); + } +} + + /* * citus_add_node function adds a new node to the cluster and returns its id. It also * replicates all reference tables to the new node. @@ -231,6 +268,9 @@ citus_add_node(PG_FUNCTION_ARGS) { CheckCitusVersion(ERROR); + EnsureSuperUser(); + EnsureCoordinator(); + text *nodeName = PG_GETARG_TEXT_P(0); int32 nodePort = PG_GETARG_INT32(1); char *nodeNameString = text_to_cstring(nodeName); @@ -262,38 +302,33 @@ citus_add_node(PG_FUNCTION_ARGS) nodeMetadata.shouldHaveShards = false; } - int nodeId = AddNodeMetadata(nodeNameString, nodePort, &nodeMetadata, - &nodeAlreadyExists); - TransactionModifiedNodeMetadata = true; - /* - * After adding new node, if the node did not already exist, we will activate - * the node. This means we will replicate all reference tables to the new - * node. + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. */ - if (!nodeAlreadyExists) + if (nodeMetadata.nodeRole == SecondaryNodeRoleId()) { - WorkerNode *workerNode = FindWorkerNodeAnyCluster(nodeNameString, nodePort); - - /* - * If the worker is not marked as a coordinator, check that - * the node is not trying to add itself - */ - if (workerNode != NULL && - workerNode->groupId != COORDINATOR_GROUP_ID && - workerNode->nodeRole != SecondaryNodeRoleId() && - IsWorkerTheCurrentNode(workerNode)) - { - ereport(ERROR, (errmsg("Node cannot add itself as a worker."), - errhint( - "Add the node as a coordinator by using: " - "SELECT citus_set_coordinator_host('%s', %d);", - nodeNameString, nodePort))); - } - - ActivateNode(nodeNameString, nodePort); + EnsureTransactionalMetadataSyncMode(); } + if (MetadataSyncTransMode == METADATA_SYNC_NON_TRANSACTIONAL && + IsMultiStatementTransaction()) + { + /* + * prevent inside transaction block as we use bare connections which can + * lead deadlock + */ + ereport(ERROR, (errmsg("do not add node in transaction block " + "when the sync mode is nontransactional"), + errhint("add the node after SET citus.metadata_sync_mode " + "TO 'transactional'"))); + } + + int nodeId = AddNodeMetadataViaMetadataContext(nodeNameString, nodePort, + &nodeMetadata, + &nodeAlreadyExists); + TransactionModifiedNodeMetadata = true; + PG_RETURN_INT32(nodeId); } @@ -334,8 +369,18 @@ citus_add_inactive_node(PG_FUNCTION_ARGS) ereport(ERROR, (errmsg("coordinator node cannot be added as inactive node"))); } + /* + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. + */ + if (nodeMetadata.nodeRole == SecondaryNodeRoleId()) + { + EnsureTransactionalMetadataSyncMode(); + } + + bool localOnly = false; int nodeId = AddNodeMetadata(nodeNameString, nodePort, &nodeMetadata, - &nodeAlreadyExists); + &nodeAlreadyExists, localOnly); TransactionModifiedNodeMetadata = true; PG_RETURN_INT32(nodeId); @@ -378,8 +423,15 @@ citus_add_secondary_node(PG_FUNCTION_ARGS) nodeMetadata.nodeRole = SecondaryNodeRoleId(); nodeMetadata.isActive = true; + /* + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. + */ + EnsureTransactionalMetadataSyncMode(); + + bool localOnly = false; int nodeId = AddNodeMetadata(nodeNameString, nodePort, &nodeMetadata, - &nodeAlreadyExists); + &nodeAlreadyExists, localOnly); TransactionModifiedNodeMetadata = true; PG_RETURN_INT32(nodeId); @@ -457,6 +509,15 @@ citus_disable_node(PG_FUNCTION_ARGS) ErrorIfCoordinatorMetadataSetFalse(workerNode, BoolGetDatum(isActive), "isactive"); + /* + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. + */ + if (NodeIsSecondary(workerNode)) + { + EnsureTransactionalMetadataSyncMode(); + } + WorkerNode *firstWorkerNode = GetFirstPrimaryWorkerNode(); bool disablingFirstNode = (firstWorkerNode && firstWorkerNode->nodeId == workerNode->nodeId); @@ -615,6 +676,15 @@ citus_set_node_property(PG_FUNCTION_ARGS) WorkerNode *workerNode = ModifiableWorkerNode(text_to_cstring(nodeNameText), nodePort); + /* + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. + */ + if (NodeIsSecondary(workerNode)) + { + EnsureTransactionalMetadataSyncMode(); + } + if (strcmp(text_to_cstring(propertyText), "shouldhaveshards") == 0) { SetShouldHaveShards(workerNode, value); @@ -642,308 +712,11 @@ master_set_node_property(PG_FUNCTION_ARGS) } -/* - * InterTableRelationshipCommandList returns the command list to - * set up the multiple integrations including - * - * (i) Foreign keys - * (ii) Partionining hierarchy - * - * for each citus table. - */ -static List * -InterTableRelationshipCommandList() -{ - List *distributedTableList = CitusTableList(); - List *propagatedTableList = NIL; - List *multipleTableIntegrationCommandList = NIL; - - CitusTableCacheEntry *cacheEntry = NULL; - foreach_ptr(cacheEntry, distributedTableList) - { - /* - * Skip foreign key and partition creation when we shouldn't need to sync - * tablem metadata or the Citus table is owned by an extension. - */ - if (ShouldSyncTableMetadata(cacheEntry->relationId) && - !IsTableOwnedByExtension(cacheEntry->relationId)) - { - propagatedTableList = lappend(propagatedTableList, cacheEntry); - } - } - - foreach_ptr(cacheEntry, propagatedTableList) - { - Oid relationId = cacheEntry->relationId; - - List *commandListForRelation = - InterTableRelationshipOfRelationCommandList(relationId); - - multipleTableIntegrationCommandList = list_concat( - multipleTableIntegrationCommandList, - commandListForRelation); - } - - multipleTableIntegrationCommandList = lcons(DISABLE_DDL_PROPAGATION, - multipleTableIntegrationCommandList); - multipleTableIntegrationCommandList = lappend(multipleTableIntegrationCommandList, - ENABLE_DDL_PROPAGATION); - - return multipleTableIntegrationCommandList; -} - - -/* - * PgDistTableMetadataSyncCommandList returns the command list to sync the pg_dist_* - * (except pg_dist_node) metadata. We call them as table metadata. - */ -List * -PgDistTableMetadataSyncCommandList(void) -{ - List *distributedTableList = CitusTableList(); - List *propagatedTableList = NIL; - List *metadataSnapshotCommandList = NIL; - - /* create the list of tables whose metadata will be created */ - CitusTableCacheEntry *cacheEntry = NULL; - foreach_ptr(cacheEntry, distributedTableList) - { - if (ShouldSyncTableMetadata(cacheEntry->relationId)) - { - propagatedTableList = lappend(propagatedTableList, cacheEntry); - } - } - - /* remove all dist table and object related metadata first */ - metadataSnapshotCommandList = lappend(metadataSnapshotCommandList, - DELETE_ALL_PARTITIONS); - metadataSnapshotCommandList = lappend(metadataSnapshotCommandList, DELETE_ALL_SHARDS); - metadataSnapshotCommandList = lappend(metadataSnapshotCommandList, - DELETE_ALL_PLACEMENTS); - metadataSnapshotCommandList = lappend(metadataSnapshotCommandList, - DELETE_ALL_DISTRIBUTED_OBJECTS); - metadataSnapshotCommandList = lappend(metadataSnapshotCommandList, - DELETE_ALL_COLOCATION); - - /* create pg_dist_partition, pg_dist_shard and pg_dist_placement entries */ - foreach_ptr(cacheEntry, propagatedTableList) - { - List *tableMetadataCreateCommandList = - CitusTableMetadataCreateCommandList(cacheEntry->relationId); - - metadataSnapshotCommandList = list_concat(metadataSnapshotCommandList, - tableMetadataCreateCommandList); - } - - /* commands to insert pg_dist_colocation entries */ - List *colocationGroupSyncCommandList = ColocationGroupCreateCommandList(); - metadataSnapshotCommandList = list_concat(metadataSnapshotCommandList, - colocationGroupSyncCommandList); - - List *distributedObjectSyncCommandList = DistributedObjectMetadataSyncCommandList(); - metadataSnapshotCommandList = list_concat(metadataSnapshotCommandList, - distributedObjectSyncCommandList); - - metadataSnapshotCommandList = lcons(DISABLE_DDL_PROPAGATION, - metadataSnapshotCommandList); - metadataSnapshotCommandList = lappend(metadataSnapshotCommandList, - ENABLE_DDL_PROPAGATION); - - return metadataSnapshotCommandList; -} - - -/* - * PropagateNodeWideObjectsCommandList is called during node activation to - * propagate any object that should be propagated for every node. These are - * generally not linked to any distributed object but change system wide behaviour. - */ -static List * -PropagateNodeWideObjectsCommandList() -{ - /* collect all commands */ - List *ddlCommands = NIL; - - if (EnableAlterRoleSetPropagation) - { - /* - * Get commands for database and postgres wide settings. Since these settings are not - * linked to any role that can be distributed we need to distribute them seperately - */ - List *alterRoleSetCommands = GenerateAlterRoleSetCommandForRole(InvalidOid); - ddlCommands = list_concat(ddlCommands, alterRoleSetCommands); - } - - if (list_length(ddlCommands) > 0) - { - /* if there are command wrap them in enable_ddl_propagation off */ - ddlCommands = lcons(DISABLE_DDL_PROPAGATION, ddlCommands); - ddlCommands = lappend(ddlCommands, ENABLE_DDL_PROPAGATION); - } - - return ddlCommands; -} - - -/* - * SyncDistributedObjectsCommandList returns commands to sync object dependencies - * to the given worker node. To be idempotent, it first drops the ones required to be - * dropped. - * - * Object dependencies include: - * - * - All dependencies (e.g., types, schemas, sequences) - * - All shell distributed tables - * - Inter relation between those shell tables - * - Node wide objects - * - * We also update the local group id here, as handling sequence dependencies - * requires it. - */ -List * -SyncDistributedObjectsCommandList(WorkerNode *workerNode) -{ - List *commandList = NIL; - - /* - * Propagate node wide objects. It includes only roles for now. - */ - commandList = list_concat(commandList, PropagateNodeWideObjectsCommandList()); - - /* - * Detach partitions, break dependencies between sequences and table then - * remove shell tables first. - */ - commandList = list_concat(commandList, DetachPartitionCommandList()); - commandList = lappend(commandList, BREAK_CITUS_TABLE_SEQUENCE_DEPENDENCY_COMMAND); - commandList = lappend(commandList, REMOVE_ALL_SHELL_TABLES_COMMAND); - - /* - * Replicate all objects of the pg_dist_object to the remote node. - */ - commandList = list_concat(commandList, ReplicateAllObjectsToNodeCommandList( - workerNode->workerName, workerNode->workerPort)); - - /* - * After creating each table, handle the inter table relationship between - * those tables. - */ - commandList = list_concat(commandList, InterTableRelationshipCommandList()); - - return commandList; -} - - -/* - * SyncDistributedObjectsToNodeList sync the distributed objects to the node. It includes - * - All dependencies (e.g., types, schemas, sequences) - * - All shell distributed table - * - Inter relation between those shell tables - * - * Note that we do not create the distributed dependencies on the coordinator - * since all the dependencies should be present in the coordinator already. - */ -static void -SyncDistributedObjectsToNodeList(List *workerNodeList) -{ - List *workerNodesToSync = NIL; - WorkerNode *workerNode = NULL; - foreach_ptr(workerNode, workerNodeList) - { - if (NodeIsCoordinator(workerNode)) - { - /* coordinator has all the objects */ - continue; - } - - if (!NodeIsPrimary(workerNode)) - { - /* secondary nodes gets the objects from their primaries via replication */ - continue; - } - - workerNodesToSync = lappend(workerNodesToSync, workerNode); - } - - if (workerNodesToSync == NIL) - { - return; - } - - EnsureSequentialModeMetadataOperations(); - - Assert(ShouldPropagate()); - - List *commandList = SyncDistributedObjectsCommandList(workerNode); - - /* send commands to new workers, the current user should be a superuser */ - Assert(superuser()); - SendMetadataCommandListToWorkerListInCoordinatedTransaction( - workerNodesToSync, - CurrentUserName(), - commandList); -} - - -/* - * UpdateLocalGroupIdOnNode updates local group id on node. - */ -static void -UpdateLocalGroupIdOnNode(WorkerNode *workerNode) -{ - if (NodeIsPrimary(workerNode) && !NodeIsCoordinator(workerNode)) - { - List *commandList = list_make1(LocalGroupIdUpdateCommand(workerNode->groupId)); - - /* send commands to new workers, the current user should be a superuser */ - Assert(superuser()); - SendMetadataCommandListToWorkerListInCoordinatedTransaction( - list_make1(workerNode), - CurrentUserName(), - commandList); - } -} - - -/* - * SyncPgDistTableMetadataToNodeList syncs the pg_dist_partition, pg_dist_shard - * pg_dist_placement and pg_dist_object metadata entries. - * - */ -static void -SyncPgDistTableMetadataToNodeList(List *nodeList) -{ - /* send commands to new workers, the current user should be a superuser */ - Assert(superuser()); - - List *nodesWithMetadata = NIL; - WorkerNode *workerNode = NULL; - foreach_ptr(workerNode, nodeList) - { - if (NodeIsPrimary(workerNode) && !NodeIsCoordinator(workerNode)) - { - nodesWithMetadata = lappend(nodesWithMetadata, workerNode); - } - } - - if (nodesWithMetadata == NIL) - { - return; - } - - List *syncPgDistMetadataCommandList = PgDistTableMetadataSyncCommandList(); - SendMetadataCommandListToWorkerListInCoordinatedTransaction( - nodesWithMetadata, - CurrentUserName(), - syncPgDistMetadataCommandList); -} - - /* * ModifiableWorkerNode gets the requested WorkerNode and also gets locks * required for modifying it. This fails if the node does not exist. */ -static WorkerNode * +WorkerNode * ModifiableWorkerNode(const char *nodeName, int32 nodePort) { CheckCitusVersion(ERROR); @@ -972,10 +745,30 @@ citus_activate_node(PG_FUNCTION_ARGS) text *nodeNameText = PG_GETARG_TEXT_P(0); int32 nodePort = PG_GETARG_INT32(1); - WorkerNode *workerNode = ModifiableWorkerNode(text_to_cstring(nodeNameText), - nodePort); - ActivateNode(workerNode->workerName, workerNode->workerPort); + char *nodeNameString = text_to_cstring(nodeNameText); + WorkerNode *workerNode = ModifiableWorkerNode(nodeNameString, nodePort); + /* + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. + */ + if (NodeIsSecondary(workerNode)) + { + EnsureTransactionalMetadataSyncMode(); + } + + /* + * Create MetadataSyncContext which is used throughout nodes' activation. + * It contains activated nodes, bare connections if the mode is nontransactional, + * and a memory context for allocation. + */ + bool collectCommands = false; + bool nodesAddedInSameTransaction = false; + MetadataSyncContext *context = CreateMetadataSyncContext(list_make1(workerNode), + collectCommands, + nodesAddedInSameTransaction); + + ActivateNodeList(context); TransactionModifiedNodeMetadata = true; PG_RETURN_INT32(workerNode->nodeId); @@ -1131,14 +924,145 @@ PrimaryNodeForGroup(int32 groupId, bool *groupContainsNodes) /* - * ActivateNodeList iterates over the nodeList and activates the nodes. - * Some part of the node activation is done parallel across the nodes, - * such as syncing the metadata. However, reference table replication is - * done one by one across nodes. + * MarkNodesNotSyncedInLoopBackConnection unsets metadatasynced flag in separate + * connection to localhost by calling the udf `citus_internal_mark_node_not_synced`. + */ +static void +MarkNodesNotSyncedInLoopBackConnection(MetadataSyncContext *context, + pid_t parentSessionPid) +{ + Assert(context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL); + Assert(!MetadataSyncCollectsCommands(context)); + + /* + * Set metadatasynced to false for all activated nodes to mark the nodes as not synced + * in case nontransactional metadata sync fails before we activate the nodes inside + * metadataSyncContext. + * We set metadatasynced to false at coordinator to mark the nodes as not synced. But we + * do not set isactive and hasmetadata flags to false as we still want to route queries + * to the nodes if their isactive flag is true and propagate DDL to the nodes if possible. + * + * NOTES: + * 1) We use separate connection to localhost as we would rollback the local + * transaction in case of failure. + * 2) Operator should handle problems at workers if any. Wworkers probably fail + * due to improper metadata when a query hits. Or DDL might fail due to desynced + * nodes. (when hasmetadata = true, metadatasynced = false) + * In those cases, proper metadata sync for the workers should be done.) + */ + + /* + * Because we try to unset metadatasynced flag with a separate transaction, + * we could not find the new node if the node is added in the current local + * transaction. But, hopefully, we do not need to unset metadatasynced for + * the new node as local transaction would rollback in case of a failure. + */ + if (context->nodesAddedInSameTransaction) + { + return; + } + + if (context->activatedWorkerNodeList == NIL) + { + return; + } + + int connectionFlag = FORCE_NEW_CONNECTION; + MultiConnection *connection = GetNodeConnection(connectionFlag, LocalHostName, + PostPortNumber); + + List *commandList = NIL; + WorkerNode *workerNode = NULL; + foreach_ptr(workerNode, context->activatedWorkerNodeList) + { + /* + * We need to prevent self deadlock when we access pg_dist_node using separate + * connection to localhost. To achieve this, we check if the caller session's + * pid holds the Exclusive lock on pg_dist_node. After ensuring that (we are + * called from parent session which holds the Exclusive lock), we can safely + * update node metadata by acquiring the relaxed lock. + */ + StringInfo metadatasyncCommand = makeStringInfo(); + appendStringInfo(metadatasyncCommand, CITUS_INTERNAL_MARK_NODE_NOT_SYNCED, + parentSessionPid, workerNode->nodeId); + commandList = lappend(commandList, metadatasyncCommand->data); + } + + SendCommandListToWorkerOutsideTransactionWithConnection(connection, commandList); + CloseConnection(connection); +} + + +/* + * SetNodeMetadata sets isactive, metadatasynced and hasmetadata flags locally + * and, if required, remotely. + */ +static void +SetNodeMetadata(MetadataSyncContext *context, bool localOnly) +{ + /* do not execute local transaction if we collect commands */ + if (!MetadataSyncCollectsCommands(context)) + { + List *updatedActivatedNodeList = NIL; + + WorkerNode *node = NULL; + foreach_ptr(node, context->activatedWorkerNodeList) + { + node = SetWorkerColumnLocalOnly(node, Anum_pg_dist_node_isactive, + BoolGetDatum(true)); + node = SetWorkerColumnLocalOnly(node, Anum_pg_dist_node_metadatasynced, + BoolGetDatum(true)); + node = SetWorkerColumnLocalOnly(node, Anum_pg_dist_node_hasmetadata, + BoolGetDatum(true)); + + updatedActivatedNodeList = lappend(updatedActivatedNodeList, node); + } + + /* reset activated nodes inside metadataSyncContext afer local update */ + SetMetadataSyncNodesFromNodeList(context, updatedActivatedNodeList); + } + + if (!localOnly && EnableMetadataSync) + { + WorkerNode *node = NULL; + foreach_ptr(node, context->activatedWorkerNodeList) + { + SetNodeStateViaMetadataContext(context, node, BoolGetDatum(true)); + } + } +} + + +/* + * ActivateNodeList does some sanity checks and acquire Exclusive lock on pg_dist_node, + * and then activates the nodes inside given metadataSyncContext. + * + * The function operates in 3 different modes according to transactionMode inside + * metadataSyncContext. + * + * 1. MetadataSyncCollectsCommands(context): + * Only collect commands instead of sending them to workers, + * 2. context.transactionMode == METADATA_SYNC_TRANSACTIONAL: + * Send all commands using coordinated transaction, + * 3. context.transactionMode == METADATA_SYNC_NON_TRANSACTIONAL: + * Send all commands using bare (no transaction block) connections. */ void -ActivateNodeList(List *nodeList) +ActivateNodeList(MetadataSyncContext *context) { + if (context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL && + IsMultiStatementTransaction()) + { + /* + * prevent inside transaction block as we use bare connections which can + * lead deadlock + */ + ereport(ERROR, (errmsg("do not sync metadata in transaction block " + "when the sync mode is nontransactional"), + errhint("resync after SET citus.metadata_sync_mode " + "TO 'transactional'"))); + } + /* * We currently require the object propagation to happen via superuser, * see #5139. While activating a node, we sync both metadata and object @@ -1152,122 +1076,86 @@ ActivateNodeList(List *nodeList) */ EnsureSuperUser(); - /* take an exclusive lock on pg_dist_node to serialize pg_dist_node changes */ + /* + * Take an exclusive lock on pg_dist_node to serialize pg_dist_node + * changes. + */ LockRelationOid(DistNodeRelationId(), ExclusiveLock); + /* + * Error if there is concurrent change to node table before acquiring + * the lock + */ + ErrorIfAnyNodeNotExist(context->activatedWorkerNodeList); - List *nodeToSyncMetadata = NIL; - WorkerNode *node = NULL; - foreach_ptr(node, nodeList) + /* + * we need to unset metadatasynced flag to false at coordinator in separate + * transaction only at nontransactional sync mode and if we do not collect + * commands. + * + * We make sure we set the flag to false at the start of nontransactional + * metadata sync to mark those nodes are not synced in case of a failure in + * the middle of the sync. + */ + if (context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL && + !MetadataSyncCollectsCommands(context)) { - /* - * First, locally mark the node is active, if everything goes well, - * we are going to sync this information to all the metadata nodes. - */ - WorkerNode *workerNode = - FindWorkerNodeAnyCluster(node->workerName, node->workerPort); - if (workerNode == NULL) - { - ereport(ERROR, (errmsg("node at \"%s:%u\" does not exist", node->workerName, - node->workerPort))); - } - - /* both nodes should be the same */ - Assert(workerNode->nodeId == node->nodeId); - - /* - * Delete existing reference and replicated table placements on the - * given groupId if the group has been disabled earlier (e.g., isActive - * set to false). - * - * Sync the metadata changes to all existing metadata nodes irrespective - * of the current nodes' metadata sync state. We expect all nodes up - * and running when another node is activated. - */ - if (!workerNode->isActive && NodeIsPrimary(workerNode)) - { - bool localOnly = false; - DeleteAllReplicatedTablePlacementsFromNodeGroup(workerNode->groupId, - localOnly); - } - - workerNode = - SetWorkerColumnLocalOnly(workerNode, Anum_pg_dist_node_isactive, - BoolGetDatum(true)); - - /* TODO: Once all tests will be enabled for MX, we can remove sync by default check */ - bool syncMetadata = EnableMetadataSync && NodeIsPrimary(workerNode); - if (syncMetadata) - { - /* - * We are going to sync the metadata anyway in this transaction, so do - * not fail just because the current metadata is not synced. - */ - SetWorkerColumn(workerNode, Anum_pg_dist_node_metadatasynced, - BoolGetDatum(true)); - - /* - * Update local group id first, as object dependency logic requires to have - * updated local group id. - */ - UpdateLocalGroupIdOnNode(workerNode); - - nodeToSyncMetadata = lappend(nodeToSyncMetadata, workerNode); - } + MarkNodesNotSyncedInLoopBackConnection(context, MyProcPid); } /* - * Sync distributed objects first. We must sync distributed objects before - * replicating reference tables to the remote node, as reference tables may - * need such objects. + * Delete existing reference and replicated table placements on the + * given groupId if the group has been disabled earlier (e.g., isActive + * set to false). */ - SyncDistributedObjectsToNodeList(nodeToSyncMetadata); + SendDeletionCommandsForReplicatedTablePlacements(context); /* - * Sync node metadata. We must sync node metadata before syncing table - * related pg_dist_xxx metadata. Since table related metadata requires - * to have right pg_dist_node entries. + * SetNodeMetadata sets isactive, metadatasynced and hasmetadata flags + * locally for following reasons: + * + * 1) Set isactive to true locally so that we can find activated nodes amongst + * active workers, + * 2) Do not fail just because the current metadata is not synced. (see + * ErrorIfAnyMetadataNodeOutOfSync), + * 3) To propagate activated nodes nodemetadata correctly. + * + * We are going to sync the metadata anyway in this transaction, set + * isactive, metadatasynced, and hasmetadata to true locally. + * The changes would rollback in case of failure. */ - foreach_ptr(node, nodeToSyncMetadata) - { - SyncNodeMetadataToNode(node->workerName, node->workerPort); - } + bool localOnly = true; + SetNodeMetadata(context, localOnly); /* - * As the last step, sync the table related metadata to the remote node. - * We must handle it as the last step because of limitations shared with - * above comments. + * Update local group ids so that upcoming transactions can see its effect. + * Object dependency logic requires to have updated local group id. */ - SyncPgDistTableMetadataToNodeList(nodeToSyncMetadata); + UpdateLocalGroupIdsViaMetadataContext(context); - foreach_ptr(node, nodeList) - { - bool isActive = true; + /* + * Sync node metadata so that placement insertion does not fail due to + * EnsureShardPlacementMetadataIsSane. + */ + SyncNodeMetadata(context); - /* finally, let all other active metadata nodes to learn about this change */ - SetNodeState(node->workerName, node->workerPort, isActive); - } -} + /* + * Sync all dependencies and distributed objects with their pg_dist_xx tables to + * metadata nodes inside metadataSyncContext. Depends on node metadata. + */ + SyncDistributedObjects(context); - -/* - * ActivateNode activates the node with nodeName and nodePort. Currently, activation - * includes only replicating the reference tables and setting isactive column of the - * given node. - */ -int -ActivateNode(char *nodeName, int nodePort) -{ - bool isActive = true; - - WorkerNode *workerNode = ModifiableWorkerNode(nodeName, nodePort); - ActivateNodeList(list_make1(workerNode)); - - /* finally, let all other active metadata nodes to learn about this change */ - WorkerNode *newWorkerNode = SetNodeState(nodeName, nodePort, isActive); - Assert(newWorkerNode->nodeId == workerNode->nodeId); - - return newWorkerNode->nodeId; + /* + * Let all nodes to be active and synced after all operations succeeded. + * we make sure that the metadata sync is idempotent and safe overall with multiple + * other transactions, if nontransactional mode is used. + * + * We already took Exclusive lock on node metadata, which prevents modification + * on node metadata on coordinator. The step will rollback, in case of a failure, + * to the state where metadatasynced=false. + */ + localOnly = false; + SetNodeMetadata(context, localOnly); } @@ -1328,6 +1216,14 @@ citus_update_node(PG_FUNCTION_ARGS) errmsg("node %u not found", nodeId))); } + /* + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. + */ + if (NodeIsSecondary(workerNode)) + { + EnsureTransactionalMetadataSyncMode(); + } /* * If the node is a primary node we block reads and writes. @@ -1536,7 +1432,7 @@ get_shard_id_for_distribution_column(PG_FUNCTION_ARGS) errmsg("relation is not distributed"))); } - if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKey(relationId)) { List *shardIntervalList = LoadShardIntervalList(relationId); if (shardIntervalList == NIL) @@ -1672,6 +1568,98 @@ citus_is_coordinator(PG_FUNCTION_ARGS) } +/* + * EnsureParentSessionHasExclusiveLockOnPgDistNode ensures given session id + * holds Exclusive lock on pg_dist_node. + */ +static void +EnsureParentSessionHasExclusiveLockOnPgDistNode(pid_t parentSessionPid) +{ + StringInfo checkIfParentLockCommandStr = makeStringInfo(); + + int spiConnectionResult = SPI_connect(); + if (spiConnectionResult != SPI_OK_CONNECT) + { + ereport(ERROR, (errmsg("could not connect to SPI manager"))); + } + + char *checkIfParentLockCommand = "SELECT pid FROM pg_locks WHERE " + "pid = %d AND database = %d AND relation = %d AND " + "mode = 'ExclusiveLock' AND granted = TRUE"; + appendStringInfo(checkIfParentLockCommandStr, checkIfParentLockCommand, + parentSessionPid, MyDatabaseId, DistNodeRelationId()); + + bool readOnly = true; + int spiQueryResult = SPI_execute(checkIfParentLockCommandStr->data, readOnly, 0); + if (spiQueryResult != SPI_OK_SELECT) + { + ereport(ERROR, (errmsg("execution was not successful \"%s\"", + checkIfParentLockCommandStr->data))); + } + + bool parentHasExclusiveLock = SPI_processed > 0; + + SPI_finish(); + + if (!parentHasExclusiveLock) + { + ereport(ERROR, (errmsg("lock is not held by the caller. Unexpected caller " + "for citus_internal_mark_node_not_synced"))); + } +} + + +/* + * citus_internal_mark_node_not_synced unsets metadatasynced flag in separate connection + * to localhost. Should only be called by `MarkNodesNotSyncedInLoopBackConnection`. + * See it for details. + */ +Datum +citus_internal_mark_node_not_synced(PG_FUNCTION_ARGS) +{ + CheckCitusVersion(ERROR); + + /* only called by superuser */ + EnsureSuperUser(); + + pid_t parentSessionPid = PG_GETARG_INT32(0); + + /* fetch node by id */ + int nodeId = PG_GETARG_INT32(1); + HeapTuple heapTuple = GetNodeByNodeId(nodeId); + + /* ensure that parent session holds Exclusive lock to pg_dist_node */ + EnsureParentSessionHasExclusiveLockOnPgDistNode(parentSessionPid); + + /* + * We made sure parent session holds the ExclusiveLock, so we can unset + * metadatasynced for the node safely with the relaxed lock here. + */ + Relation pgDistNode = table_open(DistNodeRelationId(), AccessShareLock); + TupleDesc tupleDescriptor = RelationGetDescr(pgDistNode); + + Datum values[Natts_pg_dist_node]; + bool isnull[Natts_pg_dist_node]; + bool replace[Natts_pg_dist_node]; + + memset(replace, 0, sizeof(replace)); + values[Anum_pg_dist_node_metadatasynced - 1] = DatumGetBool(false); + isnull[Anum_pg_dist_node_metadatasynced - 1] = false; + replace[Anum_pg_dist_node_metadatasynced - 1] = true; + + heapTuple = heap_modify_tuple(heapTuple, tupleDescriptor, values, isnull, replace); + + CatalogTupleUpdate(pgDistNode, &heapTuple->t_self, heapTuple); + + CitusInvalidateRelcacheByRelid(DistNodeRelationId()); + CommandCounterIncrement(); + + table_close(pgDistNode, NoLock); + + PG_RETURN_VOID(); +} + + /* * FindWorkerNode searches over the worker nodes and returns the workerNode * if it already exists. Else, the function returns NULL. @@ -1874,6 +1862,16 @@ static void RemoveNodeFromCluster(char *nodeName, int32 nodePort) { WorkerNode *workerNode = ModifiableWorkerNode(nodeName, nodePort); + + /* + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. + */ + if (NodeIsSecondary(workerNode)) + { + EnsureTransactionalMetadataSyncMode(); + } + if (NodeIsPrimary(workerNode)) { ErrorIfNodeContainsNonRemovablePlacements(workerNode); @@ -1918,6 +1916,10 @@ ErrorIfNodeContainsNonRemovablePlacements(WorkerNode *workerNode) { int32 groupId = workerNode->groupId; List *shardPlacements = AllShardPlacementsOnNodeGroup(groupId); + + /* sort the list to prevent regression tests getting flaky */ + shardPlacements = SortList(shardPlacements, CompareGroupShardPlacements); + GroupShardPlacement *placement = NULL; foreach_ptr(placement, shardPlacements) { @@ -1998,12 +2000,11 @@ CountPrimariesWithMetadata(void) * If not, the following procedure is followed while adding a node: If the groupId is not * explicitly given by the user, the function picks the group that the new node should * be in with respect to GroupSize. Then, the new node is inserted into the local - * pg_dist_node as well as the nodes with hasmetadata=true. + * pg_dist_node as well as the nodes with hasmetadata=true if localOnly is false. */ static int -AddNodeMetadata(char *nodeName, int32 nodePort, - NodeMetadata *nodeMetadata, - bool *nodeAlreadyExists) +AddNodeMetadata(char *nodeName, int32 nodePort, NodeMetadata *nodeMetadata, + bool *nodeAlreadyExists, bool localOnly) { EnsureCoordinator(); @@ -2132,7 +2133,7 @@ AddNodeMetadata(char *nodeName, int32 nodePort, workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort); - if (EnableMetadataSync) + if (EnableMetadataSync && !localOnly) { /* send the delete command to all primary nodes with metadata */ char *nodeDeleteCommand = NodeDeleteCommand(workerNode->nodeId); @@ -2153,6 +2154,93 @@ AddNodeMetadata(char *nodeName, int32 nodePort, } +/* + * AddNodeMetadataViaMetadataContext does the same thing as AddNodeMetadata but + * make use of metadata sync context to send commands to workers to support both + * transactional and nontransactional sync modes. + */ +static int +AddNodeMetadataViaMetadataContext(char *nodeName, int32 nodePort, + NodeMetadata *nodeMetadata, bool *nodeAlreadyExists) +{ + bool localOnly = true; + int nodeId = AddNodeMetadata(nodeName, nodePort, nodeMetadata, nodeAlreadyExists, + localOnly); + + /* do nothing as the node already exists */ + if (*nodeAlreadyExists) + { + return nodeId; + } + + /* + * Create metadata sync context that is used throughout node addition + * and activation if necessary. + */ + WorkerNode *node = ModifiableWorkerNode(nodeName, nodePort); + + /* we should always set active flag to true if we call citus_add_node */ + node = SetWorkerColumnLocalOnly(node, Anum_pg_dist_node_isactive, DatumGetBool(true)); + + /* + * After adding new node, if the node did not already exist, we will activate + * the node. + * If the worker is not marked as a coordinator, check that + * the node is not trying to add itself + */ + if (node != NULL && + node->groupId != COORDINATOR_GROUP_ID && + node->nodeRole != SecondaryNodeRoleId() && + IsWorkerTheCurrentNode(node)) + { + ereport(ERROR, (errmsg("Node cannot add itself as a worker."), + errhint( + "Add the node as a coordinator by using: " + "SELECT citus_set_coordinator_host('%s', %d);", + node->workerName, node->workerPort))); + } + + List *nodeList = list_make1(node); + bool collectCommands = false; + bool nodesAddedInSameTransaction = true; + MetadataSyncContext *context = CreateMetadataSyncContext(nodeList, collectCommands, + nodesAddedInSameTransaction); + + if (EnableMetadataSync) + { + /* send the delete command to all primary nodes with metadata */ + char *nodeDeleteCommand = NodeDeleteCommand(node->nodeId); + SendOrCollectCommandListToMetadataNodes(context, list_make1(nodeDeleteCommand)); + + /* finally prepare the insert command and send it to all primary nodes */ + uint32 primariesWithMetadata = CountPrimariesWithMetadata(); + if (primariesWithMetadata != 0) + { + char *nodeInsertCommand = NULL; + if (context->transactionMode == METADATA_SYNC_TRANSACTIONAL) + { + nodeInsertCommand = NodeListInsertCommand(nodeList); + } + else if (context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL) + { + /* + * We need to ensure node insertion is idempotent in nontransactional + * sync mode. + */ + nodeInsertCommand = NodeListIdempotentInsertCommand(nodeList); + } + Assert(nodeInsertCommand != NULL); + SendOrCollectCommandListToMetadataNodes(context, + list_make1(nodeInsertCommand)); + } + } + + ActivateNodeList(context); + + return nodeId; +} + + /* * SetWorkerColumn function sets the column with the specified index * on the worker in pg_dist_node, by calling SetWorkerColumnLocalOnly. @@ -2177,6 +2265,30 @@ SetWorkerColumn(WorkerNode *workerNode, int columnIndex, Datum value) } +/* + * SetNodeStateViaMetadataContext sets or unsets isactive, metadatasynced, and hasmetadata + * flags via metadataSyncContext. + */ +static void +SetNodeStateViaMetadataContext(MetadataSyncContext *context, WorkerNode *workerNode, + Datum value) +{ + char *isActiveCommand = + GetMetadataSyncCommandToSetNodeColumn(workerNode, Anum_pg_dist_node_isactive, + value); + char *metadatasyncedCommand = + GetMetadataSyncCommandToSetNodeColumn(workerNode, + Anum_pg_dist_node_metadatasynced, value); + char *hasmetadataCommand = + GetMetadataSyncCommandToSetNodeColumn(workerNode, Anum_pg_dist_node_hasmetadata, + value); + List *commandList = list_make3(isActiveCommand, metadatasyncedCommand, + hasmetadataCommand); + + SendOrCollectCommandListToMetadataNodes(context, commandList); +} + + /* * SetWorkerColumnOptional function sets the column with the specified index * on the worker in pg_dist_node, by calling SetWorkerColumnLocalOnly. @@ -2389,20 +2501,6 @@ SetShouldHaveShards(WorkerNode *workerNode, bool shouldHaveShards) } -/* - * SetNodeState function sets the isactive column of the specified worker in - * pg_dist_node to isActive. Also propagates this to other metadata nodes. - * It returns the new worker node after the modification. - */ -static WorkerNode * -SetNodeState(char *nodeName, int nodePort, bool isActive) -{ - WorkerNode *workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort); - return SetWorkerColumn(workerNode, Anum_pg_dist_node_isactive, BoolGetDatum( - isActive)); -} - - /* * GetNodeTuple function returns the heap tuple of given nodeName and nodePort. If the * node is not found this function returns NULL. @@ -2439,6 +2537,41 @@ GetNodeTuple(const char *nodeName, int32 nodePort) } +/* + * GetNodeByNodeId returns the heap tuple for given node id by looking up catalog. + */ +static HeapTuple +GetNodeByNodeId(int32 nodeId) +{ + Relation pgDistNode = table_open(DistNodeRelationId(), AccessShareLock); + const int scanKeyCount = 1; + const bool indexOK = false; + + ScanKeyData scanKey[1]; + HeapTuple nodeTuple = NULL; + + ScanKeyInit(&scanKey[0], Anum_pg_dist_node_nodeid, + BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(nodeId)); + SysScanDesc scanDescriptor = systable_beginscan(pgDistNode, InvalidOid, indexOK, + NULL, scanKeyCount, scanKey); + + HeapTuple heapTuple = systable_getnext(scanDescriptor); + if (HeapTupleIsValid(heapTuple)) + { + nodeTuple = heap_copytuple(heapTuple); + } + else + { + ereport(ERROR, (errmsg("could not find valid entry for node id %d", nodeId))); + } + + systable_endscan(scanDescriptor); + table_close(pgDistNode, NoLock); + + return nodeTuple; +} + + /* * GetNextGroupId allocates and returns a unique groupId for the group * to be created. This allocation occurs both in shared memory and in write @@ -2579,9 +2712,11 @@ InsertPlaceholderCoordinatorRecord(void) nodeMetadata.nodeCluster = "default"; bool nodeAlreadyExists = false; + bool localOnly = false; /* as long as there is a single node, localhost should be ok */ - AddNodeMetadata(LocalHostName, PostPortNumber, &nodeMetadata, &nodeAlreadyExists); + AddNodeMetadata(LocalHostName, PostPortNumber, &nodeMetadata, &nodeAlreadyExists, + localOnly); } @@ -2864,3 +2999,119 @@ UnsetMetadataSyncedForAllWorkers(void) return updatedAtLeastOne; } + + +/* + * ErrorIfAnyNodeNotExist errors if any node in given list not found. + */ +static void +ErrorIfAnyNodeNotExist(List *nodeList) +{ + WorkerNode *node = NULL; + foreach_ptr(node, nodeList) + { + /* + * First, locally mark the node is active, if everything goes well, + * we are going to sync this information to all the metadata nodes. + */ + WorkerNode *workerNode = + FindWorkerNodeAnyCluster(node->workerName, node->workerPort); + if (workerNode == NULL) + { + ereport(ERROR, (errmsg("node at \"%s:%u\" does not exist", node->workerName, + node->workerPort))); + } + } +} + + +/* + * UpdateLocalGroupIdsViaMetadataContext updates local group ids for given list + * of nodes with transactional or nontransactional mode according to transactionMode + * inside metadataSyncContext. + */ +static void +UpdateLocalGroupIdsViaMetadataContext(MetadataSyncContext *context) +{ + int activatedPrimaryCount = list_length(context->activatedWorkerNodeList); + int nodeIdx = 0; + for (nodeIdx = 0; nodeIdx < activatedPrimaryCount; nodeIdx++) + { + WorkerNode *node = list_nth(context->activatedWorkerNodeList, nodeIdx); + List *commandList = list_make1(LocalGroupIdUpdateCommand(node->groupId)); + + /* send commands to new workers, the current user should be a superuser */ + Assert(superuser()); + + SendOrCollectCommandListToSingleNode(context, commandList, nodeIdx); + } +} + + +/* + * SendDeletionCommandsForReplicatedTablePlacements sends commands to delete replicated + * placement for the metadata nodes with transactional or nontransactional mode according + * to transactionMode inside metadataSyncContext. + */ +static void +SendDeletionCommandsForReplicatedTablePlacements(MetadataSyncContext *context) +{ + WorkerNode *node = NULL; + foreach_ptr(node, context->activatedWorkerNodeList) + { + if (!node->isActive) + { + bool localOnly = false; + int32 groupId = node->groupId; + DeleteAllReplicatedTablePlacementsFromNodeGroupViaMetadataContext(context, + groupId, + localOnly); + } + } +} + + +/* + * SyncNodeMetadata syncs node metadata with transactional or nontransactional + * mode according to transactionMode inside metadataSyncContext. + */ +static void +SyncNodeMetadata(MetadataSyncContext *context) +{ + CheckCitusVersion(ERROR); + + if (!EnableMetadataSync) + { + return; + } + + /* + * Do not fail when we call this method from activate_node_snapshot + * from workers. + */ + if (!MetadataSyncCollectsCommands(context)) + { + EnsureCoordinator(); + } + + EnsureModificationsCanRun(); + EnsureSequentialModeMetadataOperations(); + + LockRelationOid(DistNodeRelationId(), ExclusiveLock); + + /* generate the queries which drop the node metadata */ + List *dropMetadataCommandList = NodeMetadataDropCommands(); + + /* generate the queries which create the node metadata from scratch */ + List *createMetadataCommandList = NodeMetadataCreateCommands(); + + List *recreateNodeSnapshotCommandList = dropMetadataCommandList; + recreateNodeSnapshotCommandList = list_concat(recreateNodeSnapshotCommandList, + createMetadataCommandList); + + /* + * We should have already added node metadata to metadata workers. Sync node + * metadata just for activated workers. + */ + SendOrCollectCommandListToActivatedNodes(context, recreateNodeSnapshotCommandList); +} diff --git a/src/backend/distributed/metadata/pg_get_object_address_13_14_15.c b/src/backend/distributed/metadata/pg_get_object_address_13_14_15.c index 00c2da620..bcd74fbbc 100644 --- a/src/backend/distributed/metadata/pg_get_object_address_13_14_15.c +++ b/src/backend/distributed/metadata/pg_get_object_address_13_14_15.c @@ -425,6 +425,7 @@ ErrorIfCurrentUserCanNotDistributeObject(char *textType, ObjectType type, case OBJECT_COLLATION: case OBJECT_VIEW: case OBJECT_ROLE: + case OBJECT_PUBLICATION: { check_object_ownership(userId, type, *addr, node, *relation); break; diff --git a/src/backend/distributed/operations/create_shards.c b/src/backend/distributed/operations/create_shards.c index 282993d7b..3edab94e9 100644 --- a/src/backend/distributed/operations/create_shards.c +++ b/src/backend/distributed/operations/create_shards.c @@ -215,6 +215,7 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool { bool colocatedShard = true; List *insertedShardPlacements = NIL; + List *insertedShardIds = NIL; /* make sure that tables are hash partitioned */ CheckHashPartitionedTable(targetRelationId); @@ -254,7 +255,9 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool foreach_ptr(sourceShardInterval, sourceShardIntervalList) { uint64 sourceShardId = sourceShardInterval->shardId; - uint64 newShardId = GetNextShardId(); + uint64 *newShardIdPtr = (uint64 *) palloc0(sizeof(uint64)); + *newShardIdPtr = GetNextShardId(); + insertedShardIds = lappend(insertedShardIds, newShardIdPtr); int32 shardMinValue = DatumGetInt32(sourceShardInterval->minValue); int32 shardMaxValue = DatumGetInt32(sourceShardInterval->maxValue); @@ -263,7 +266,7 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool List *sourceShardPlacementList = ShardPlacementListSortedByWorker( sourceShardId); - InsertShardRow(targetRelationId, newShardId, targetShardStorageType, + InsertShardRow(targetRelationId, *newShardIdPtr, targetShardStorageType, shardMinValueText, shardMaxValueText); ShardPlacement *sourcePlacement = NULL; @@ -272,21 +275,26 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool int32 groupId = sourcePlacement->groupId; const uint64 shardSize = 0; - /* - * Optimistically add shard placement row the pg_dist_shard_placement, in case - * of any error it will be roll-backed. - */ - uint64 shardPlacementId = InsertShardPlacementRow(newShardId, - INVALID_PLACEMENT_ID, - shardSize, - groupId); - - ShardPlacement *shardPlacement = LoadShardPlacement(newShardId, - shardPlacementId); - insertedShardPlacements = lappend(insertedShardPlacements, shardPlacement); + InsertShardPlacementRow(*newShardIdPtr, + INVALID_PLACEMENT_ID, + shardSize, + groupId); } } + /* + * load shard placements for the shard at once after all placement insertions + * finished. That prevents MetadataCache from rebuilding unnecessarily after + * each placement insertion. + */ + uint64 *shardIdPtr; + foreach_ptr(shardIdPtr, insertedShardIds) + { + List *placementsForShard = ShardPlacementList(*shardIdPtr); + insertedShardPlacements = list_concat(insertedShardPlacements, + placementsForShard); + } + CreateShardsOnWorkers(targetRelationId, insertedShardPlacements, useExclusiveConnections, colocatedShard); } diff --git a/src/backend/distributed/operations/node_protocol.c b/src/backend/distributed/operations/node_protocol.c index 172a2a303..dca9906a6 100644 --- a/src/backend/distributed/operations/node_protocol.c +++ b/src/backend/distributed/operations/node_protocol.c @@ -461,10 +461,7 @@ ResolveRelationId(text *relationName, bool missingOk) * definition, optional column storage and statistics definitions, and index * constraint and trigger definitions. * When IncludeIdentities is NO_IDENTITY, the function does not include identity column - * specifications. When it's INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS, the function - * uses sequences and set them as default values for identity columns by using exactly - * the same approach with worker_nextval('sequence') & nextval('sequence') logic - * desribed above. When it's INCLUDE_IDENTITY it creates GENERATED .. AS IDENTIY clauses. + * specifications. When it's INCLUDE_IDENTITY it creates GENERATED .. AS IDENTIY clauses. */ List * GetFullTableCreationCommands(Oid relationId, @@ -500,6 +497,15 @@ GetFullTableCreationCommands(Oid relationId, tableDDLEventList = lappend(tableDDLEventList, truncateTriggerCommand); } + + /* + * For identity column sequences, we only need to modify + * their min/max values to produce unique values on the worker nodes. + */ + List *identitySequenceDependencyCommandList = + IdentitySequenceDependencyCommandList(relationId); + tableDDLEventList = list_concat(tableDDLEventList, + identitySequenceDependencyCommandList); } tableDDLEventList = list_concat(tableDDLEventList, postLoadCreationCommandList); diff --git a/src/backend/distributed/operations/shard_rebalancer.c b/src/backend/distributed/operations/shard_rebalancer.c index d24936925..5d30ff8be 100644 --- a/src/backend/distributed/operations/shard_rebalancer.c +++ b/src/backend/distributed/operations/shard_rebalancer.c @@ -190,6 +190,19 @@ typedef struct WorkerShardStatistics HTAB *statistics; } WorkerShardStatistics; +/* ShardMoveDependencyHashEntry contains the taskId which any new shard move task within the corresponding colocation group must take a dependency on */ +typedef struct ShardMoveDependencyInfo +{ + int64 key; + int64 taskId; +} ShardMoveDependencyInfo; + +typedef struct ShardMoveDependencies +{ + HTAB *colocationDependencies; + HTAB *nodeDependencies; +} ShardMoveDependencies; + char *VariablesToBePassedToNewConnections = NULL; /* static declarations for main logic */ @@ -475,6 +488,7 @@ GetRebalanceSteps(RebalanceOptions *options) /* sort the lists to make the function more deterministic */ List *activeWorkerList = SortedActiveWorkers(); List *activeShardPlacementListList = NIL; + List *unbalancedShards = NIL; Oid relationId = InvalidOid; foreach_oid(relationId, options->relationIdList) @@ -490,8 +504,29 @@ GetRebalanceSteps(RebalanceOptions *options) shardPlacementList, options->workerNode); } - activeShardPlacementListList = - lappend(activeShardPlacementListList, activeShardPlacementListForRelation); + if (list_length(activeShardPlacementListForRelation) >= list_length( + activeWorkerList)) + { + activeShardPlacementListList = lappend(activeShardPlacementListList, + activeShardPlacementListForRelation); + } + else + { + /* + * If the number of shard groups are less than the number of worker nodes, + * at least one of the worker nodes will remain empty. For such cases, + * we consider those shard groups as a colocation group and try to + * distribute them across the cluster. + */ + unbalancedShards = list_concat(unbalancedShards, + activeShardPlacementListForRelation); + } + } + + if (list_length(unbalancedShards) > 0) + { + activeShardPlacementListList = lappend(activeShardPlacementListList, + unbalancedShards); } if (options->threshold < options->rebalanceStrategy->minimumThreshold) @@ -1796,10 +1831,10 @@ static void RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid) { char transferMode = LookupShardTransferMode(shardReplicationModeOid); - EnsureReferenceTablesExistOnAllNodesExtended(transferMode); if (list_length(options->relationIdList) == 0) { + EnsureReferenceTablesExistOnAllNodesExtended(transferMode); return; } @@ -1814,6 +1849,25 @@ RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid) List *placementUpdateList = GetRebalanceSteps(options); + if (transferMode == TRANSFER_MODE_AUTOMATIC) + { + /* + * If the shard transfer mode is set to auto, we should check beforehand + * if we are able to use logical replication to transfer shards or not. + * We throw an error if any of the tables do not have a replica identity, which + * is required for logical replication to replicate UPDATE and DELETE commands. + */ + PlacementUpdateEvent *placementUpdate = NULL; + foreach_ptr(placementUpdate, placementUpdateList) + { + Oid relationId = RelationIdForShard(placementUpdate->shardId); + List *colocatedTableList = ColocatedTableList(relationId); + VerifyTablesHaveReplicaIdentity(colocatedTableList); + } + } + + EnsureReferenceTablesExistOnAllNodesExtended(transferMode); + if (list_length(placementUpdateList) == 0) { return; @@ -1857,6 +1911,137 @@ ErrorOnConcurrentRebalance(RebalanceOptions *options) } +/* + * GetColocationId function returns the colocationId of the shard in a PlacementUpdateEvent. + */ +static int64 +GetColocationId(PlacementUpdateEvent *move) +{ + ShardInterval *shardInterval = LoadShardInterval(move->shardId); + + CitusTableCacheEntry *citusTableCacheEntry = GetCitusTableCacheEntry( + shardInterval->relationId); + + return citusTableCacheEntry->colocationId; +} + + +/* + * InitializeShardMoveDependencies function creates the hash maps that we use to track + * the latest moves so that subsequent moves with the same properties must take a dependency + * on them. There are two hash maps. One is for tracking the latest move scheduled in a + * given colocation group and the other one is for tracking the latest move which involves + * a given node either as its source node or its target node. + */ +static ShardMoveDependencies +InitializeShardMoveDependencies() +{ + ShardMoveDependencies shardMoveDependencies; + shardMoveDependencies.colocationDependencies = CreateSimpleHashWithNameAndSize(int64, + ShardMoveDependencyInfo, + "colocationDependencyHashMap", + 6); + shardMoveDependencies.nodeDependencies = CreateSimpleHashWithNameAndSize(int64, + ShardMoveDependencyInfo, + "nodeDependencyHashMap", + 6); + + return shardMoveDependencies; +} + + +/* + * GenerateTaskMoveDependencyList creates and returns a List of taskIds that + * the move must take a dependency on. + */ +static int64 * +GenerateTaskMoveDependencyList(PlacementUpdateEvent *move, int64 colocationId, + ShardMoveDependencies shardMoveDependencies, int *nDepends) +{ + HTAB *dependsList = CreateSimpleHashSetWithNameAndSize(int64, + "shardMoveDependencyList", 0); + + bool found; + + /* Check if there exists a move in the same colocation group scheduled earlier. */ + ShardMoveDependencyInfo *shardMoveDependencyInfo = hash_search( + shardMoveDependencies.colocationDependencies, &colocationId, HASH_ENTER, &found); + + if (found) + { + hash_search(dependsList, &shardMoveDependencyInfo->taskId, HASH_ENTER, NULL); + } + + /* Check if there exists a move scheduled earlier whose source or target node + * overlaps with the current move's source node. */ + shardMoveDependencyInfo = hash_search( + shardMoveDependencies.nodeDependencies, &move->sourceNode->nodeId, HASH_ENTER, + &found); + + if (found) + { + hash_search(dependsList, &shardMoveDependencyInfo->taskId, HASH_ENTER, NULL); + } + + /* Check if there exists a move scheduled earlier whose source or target node + * overlaps with the current move's target node. */ + shardMoveDependencyInfo = hash_search( + shardMoveDependencies.nodeDependencies, &move->targetNode->nodeId, HASH_ENTER, + &found); + + + if (found) + { + hash_search(dependsList, &shardMoveDependencyInfo->taskId, HASH_ENTER, NULL); + } + + *nDepends = hash_get_num_entries(dependsList); + + int64 *dependsArray = NULL; + + if (*nDepends > 0) + { + HASH_SEQ_STATUS seq; + + dependsArray = palloc((*nDepends) * sizeof(int64)); + + hash_seq_init(&seq, dependsList); + int i = 0; + int64 *dependsTaskId; + + while ((dependsTaskId = (int64 *) hash_seq_search(&seq)) != NULL) + { + dependsArray[i++] = *dependsTaskId; + } + } + + return dependsArray; +} + + +/* + * UpdateShardMoveDependencies function updates the dependency maps with the latest move's taskId. + */ +static void +UpdateShardMoveDependencies(PlacementUpdateEvent *move, uint64 colocationId, int64 taskId, + ShardMoveDependencies shardMoveDependencies) +{ + ShardMoveDependencyInfo *shardMoveDependencyInfo = hash_search( + shardMoveDependencies.colocationDependencies, &colocationId, HASH_ENTER, NULL); + shardMoveDependencyInfo->taskId = taskId; + + shardMoveDependencyInfo = hash_search(shardMoveDependencies.nodeDependencies, + &move->sourceNode->nodeId, HASH_ENTER, NULL); + + shardMoveDependencyInfo->taskId = taskId; + + shardMoveDependencyInfo = hash_search(shardMoveDependencies.nodeDependencies, + &move->targetNode->nodeId, HASH_ENTER, NULL); + + shardMoveDependencyInfo->taskId = taskId; +} + + /* * RebalanceTableShardsBackground rebalances the shards for the relations * inside the relationIdList across the different workers. It does so using our @@ -1894,12 +2079,6 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo EnsureTableOwner(colocatedTableId); } - if (shardTransferMode == TRANSFER_MODE_AUTOMATIC) - { - /* make sure that all tables included in the rebalance have a replica identity*/ - VerifyTablesHaveReplicaIdentity(colocatedTableList); - } - List *placementUpdateList = GetRebalanceSteps(options); if (list_length(placementUpdateList) == 0) @@ -1908,6 +2087,23 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo return 0; } + if (shardTransferMode == TRANSFER_MODE_AUTOMATIC) + { + /* + * If the shard transfer mode is set to auto, we should check beforehand + * if we are able to use logical replication to transfer shards or not. + * We throw an error if any of the tables do not have a replica identity, which + * is required for logical replication to replicate UPDATE and DELETE commands. + */ + PlacementUpdateEvent *placementUpdate = NULL; + foreach_ptr(placementUpdate, placementUpdateList) + { + relationId = RelationIdForShard(placementUpdate->shardId); + List *colocatedTables = ColocatedTableList(relationId); + VerifyTablesHaveReplicaIdentity(colocatedTables); + } + } + DropOrphanedResourcesInSeparateTransaction(); /* find the name of the shard transfer mode to interpolate in the scheduled command */ @@ -1922,18 +2118,8 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo StringInfoData buf = { 0 }; initStringInfo(&buf); - /* - * Currently we only have two tasks that any move can depend on: - * - replicating reference tables - * - the previous move - * - * prevJobIdx tells what slot to write the id of the task into. We only use both slots - * if we are actually replicating reference tables. - */ - int64 prevJobId[2] = { 0 }; - int prevJobIdx = 0; - List *referenceTableIdList = NIL; + int64 replicateRefTablesTaskId = 0; if (HasNodesWithMissingReferenceTables(&referenceTableIdList)) { @@ -1949,15 +2135,15 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo appendStringInfo(&buf, "SELECT pg_catalog.replicate_reference_tables(%s)", quote_literal_cstr(shardTranferModeLabel)); - BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data, - prevJobIdx, prevJobId); - prevJobId[prevJobIdx] = task->taskid; - prevJobIdx++; + BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data, 0, + NULL); + replicateRefTablesTaskId = task->taskid; } PlacementUpdateEvent *move = NULL; - bool first = true; - int prevMoveIndex = prevJobIdx; + + ShardMoveDependencies shardMoveDependencies = InitializeShardMoveDependencies(); + foreach_ptr(move, placementUpdateList) { resetStringInfo(&buf); @@ -1969,14 +2155,27 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo move->targetNode->nodeId, quote_literal_cstr(shardTranferModeLabel)); - BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data, - prevJobIdx, prevJobId); - prevJobId[prevMoveIndex] = task->taskid; - if (first) + int64 colocationId = GetColocationId(move); + + int nDepends = 0; + + int64 *dependsArray = GenerateTaskMoveDependencyList(move, colocationId, + shardMoveDependencies, + &nDepends); + + if (nDepends == 0 && replicateRefTablesTaskId > 0) { - first = false; - prevJobIdx++; + nDepends = 1; + dependsArray = palloc(nDepends * sizeof(int64)); + dependsArray[0] = replicateRefTablesTaskId; } + + BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data, + nDepends, + dependsArray); + + UpdateShardMoveDependencies(move, colocationId, task->taskid, + shardMoveDependencies); } ereport(NOTICE, diff --git a/src/backend/distributed/operations/shard_transfer.c b/src/backend/distributed/operations/shard_transfer.c index babfe7bf5..abaa00251 100644 --- a/src/backend/distributed/operations/shard_transfer.c +++ b/src/backend/distributed/operations/shard_transfer.c @@ -70,22 +70,43 @@ typedef struct ShardCommandList List *ddlCommandList; } ShardCommandList; +static const char *ShardTransferTypeNames[] = { + [SHARD_TRANSFER_INVALID_FIRST] = "unknown", + [SHARD_TRANSFER_MOVE] = "move", + [SHARD_TRANSFER_COPY] = "copy", +}; + +static const char *ShardTransferTypeNamesCapitalized[] = { + [SHARD_TRANSFER_INVALID_FIRST] = "unknown", + [SHARD_TRANSFER_MOVE] = "Move", + [SHARD_TRANSFER_COPY] = "Copy", +}; + +static const char *ShardTransferTypeNamesContinuous[] = { + [SHARD_TRANSFER_INVALID_FIRST] = "unknown", + [SHARD_TRANSFER_MOVE] = "Moving", + [SHARD_TRANSFER_COPY] = "Copying", +}; + +static const char *ShardTransferTypeFunctionNames[] = { + [SHARD_TRANSFER_INVALID_FIRST] = "unknown", + [SHARD_TRANSFER_MOVE] = "citus_move_shard_placement", + [SHARD_TRANSFER_COPY] = "citus_copy_shard_placement", +}; + /* local function forward declarations */ static bool CanUseLogicalReplication(Oid relationId, char shardReplicationMode); static void ErrorIfTableCannotBeReplicated(Oid relationId); -static void ErrorIfTargetNodeIsNotSafeToCopyTo(const char *targetNodeName, - int targetNodePort); +static void ErrorIfTargetNodeIsNotSafeForTransfer(const char *targetNodeName, + int targetNodePort, + ShardTransferType transferType); static void ErrorIfSameNode(char *sourceNodeName, int sourceNodePort, char *targetNodeName, int targetNodePort, const char *operationName); -static void ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName, - int32 sourceNodePort, char *targetNodeName, - int32 targetNodePort, - char shardReplicationMode); static void CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodePort, char *targetNodeName, int32 targetNodePort, bool useLogicalReplication, - char *operationName); + const char *operationName); static void CopyShardTablesViaLogicalReplication(List *shardIntervalList, char *sourceNodeName, int32 sourceNodePort, @@ -100,7 +121,7 @@ static void EnsureShardCanBeCopied(int64 shardId, const char *sourceNodeName, int32 targetNodePort); static List * RecreateTableDDLCommandList(Oid relationId); static void EnsureTableListOwner(List *tableIdList); -static void EnsureTableListSuitableForReplication(List *tableIdList); +static void ErrorIfReplicatingDistributedTableWithFKeys(List *tableIdList); static void DropShardPlacementsFromMetadata(List *shardList, char *nodeName, @@ -112,12 +133,28 @@ static void UpdateColocatedShardPlacementMetadataOnWorkers(int64 shardId, int32 targetNodePort); static bool IsShardListOnNode(List *colocatedShardList, char *targetNodeName, uint32 targetPort); +static void SetupRebalanceMonitorForShardTransfer(uint64 shardId, Oid distributedTableId, + char *sourceNodeName, + uint32 sourceNodePort, + char *targetNodeName, + uint32 targetNodePort, + ShardTransferType transferType); static void CheckSpaceConstraints(MultiConnection *connection, uint64 colocationSizeInBytes); +static void EnsureAllShardsCanBeCopied(List *colocatedShardList, + char *sourceNodeName, uint32 sourceNodePort, + char *targetNodeName, uint32 targetNodePort); static void EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList, char *sourceNodeName, uint32 sourceNodePort, - char *targetNodeName, uint32 - targetNodePort); + char *targetNodeName, uint32 targetNodePort, + ShardTransferType transferType); +static bool TransferAlreadyCompleted(List *colocatedShardList, + char *sourceNodeName, uint32 sourceNodePort, + char *targetNodeName, uint32 targetNodePort, + ShardTransferType transferType); +static void LockColocatedRelationsForMove(List *colocatedTableList); +static void ErrorIfForeignTableForShardTransfer(List *colocatedTableList, + ShardTransferType transferType); static List * RecreateShardDDLCommandList(ShardInterval *shardInterval, const char *sourceNodeName, int32 sourceNodePort); @@ -163,9 +200,9 @@ citus_copy_shard_placement(PG_FUNCTION_ARGS) char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid); - ReplicateColocatedShardPlacement(shardId, sourceNodeName, sourceNodePort, - targetNodeName, targetNodePort, - shardReplicationMode); + TransferShards(shardId, sourceNodeName, sourceNodePort, + targetNodeName, targetNodePort, + shardReplicationMode, SHARD_TRANSFER_COPY); PG_RETURN_VOID(); } @@ -192,10 +229,9 @@ citus_copy_shard_placement_with_nodeid(PG_FUNCTION_ARGS) char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid); - ReplicateColocatedShardPlacement(shardId, - sourceNode->workerName, sourceNode->workerPort, - targetNode->workerName, targetNode->workerPort, - shardReplicationMode); + TransferShards(shardId, sourceNode->workerName, sourceNode->workerPort, + targetNode->workerName, targetNode->workerPort, + shardReplicationMode, SHARD_TRANSFER_COPY); PG_RETURN_VOID(); } @@ -228,9 +264,9 @@ master_copy_shard_placement(PG_FUNCTION_ARGS) ereport(WARNING, (errmsg("do_repair argument is deprecated"))); } - ReplicateColocatedShardPlacement(shardId, sourceNodeName, sourceNodePort, - targetNodeName, targetNodePort, - shardReplicationMode); + TransferShards(shardId, sourceNodeName, sourceNodePort, + targetNodeName, targetNodePort, + shardReplicationMode, SHARD_TRANSFER_COPY); PG_RETURN_VOID(); @@ -264,9 +300,10 @@ citus_move_shard_placement(PG_FUNCTION_ARGS) int32 targetNodePort = PG_GETARG_INT32(4); Oid shardReplicationModeOid = PG_GETARG_OID(5); - citus_move_shard_placement_internal(shardId, sourceNodeName, sourceNodePort, - targetNodeName, targetNodePort, - shardReplicationModeOid); + char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid); + TransferShards(shardId, sourceNodeName, sourceNodePort, + targetNodeName, targetNodePort, + shardReplicationMode, SHARD_TRANSFER_MOVE); PG_RETURN_VOID(); } @@ -291,126 +328,111 @@ citus_move_shard_placement_with_nodeid(PG_FUNCTION_ARGS) WorkerNode *sourceNode = FindNodeWithNodeId(sourceNodeId, missingOk); WorkerNode *targetNode = FindNodeWithNodeId(targetNodeId, missingOk); - citus_move_shard_placement_internal(shardId, sourceNode->workerName, - sourceNode->workerPort, targetNode->workerName, - targetNode->workerPort, - shardReplicationModeOid); + char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid); + TransferShards(shardId, sourceNode->workerName, + sourceNode->workerPort, targetNode->workerName, + targetNode->workerPort, shardReplicationMode, SHARD_TRANSFER_MOVE); PG_RETURN_VOID(); } /* - * citus_move_shard_placement_internal is the internal function for shard moves. + * TransferShards is the function for shard transfers. */ void -citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName, - int32 sourceNodePort, char *targetNodeName, - int32 targetNodePort, Oid shardReplicationModeOid) +TransferShards(int64 shardId, char *sourceNodeName, + int32 sourceNodePort, char *targetNodeName, + int32 targetNodePort, char shardReplicationMode, + ShardTransferType transferType) { - ListCell *colocatedTableCell = NULL; - ListCell *colocatedShardCell = NULL; + /* strings to be used in log messages */ + const char *operationName = ShardTransferTypeNames[transferType]; + const char *operationNameCapitalized = + ShardTransferTypeNamesCapitalized[transferType]; + const char *operationFunctionName = ShardTransferTypeFunctionNames[transferType]; + /* cannot transfer shard to the same node */ ErrorIfSameNode(sourceNodeName, sourceNodePort, targetNodeName, targetNodePort, - "move"); - - Oid relationId = RelationIdForShard(shardId); - ErrorIfMoveUnsupportedTableType(relationId); - ErrorIfTargetNodeIsNotSafeToMove(targetNodeName, targetNodePort); - - AcquirePlacementColocationLock(relationId, ExclusiveLock, "move"); + operationName); ShardInterval *shardInterval = LoadShardInterval(shardId); Oid distributedTableId = shardInterval->relationId; + /* error if unsupported shard transfer */ + if (transferType == SHARD_TRANSFER_MOVE) + { + ErrorIfMoveUnsupportedTableType(distributedTableId); + } + else if (transferType == SHARD_TRANSFER_COPY) + { + ErrorIfTableCannotBeReplicated(distributedTableId); + EnsureNoModificationsHaveBeenDone(); + } + + ErrorIfTargetNodeIsNotSafeForTransfer(targetNodeName, targetNodePort, transferType); + + AcquirePlacementColocationLock(distributedTableId, ExclusiveLock, operationName); + List *colocatedTableList = ColocatedTableList(distributedTableId); List *colocatedShardList = ColocatedShardIntervalList(shardInterval); - foreach(colocatedTableCell, colocatedTableList) + EnsureTableListOwner(colocatedTableList); + + if (transferType == SHARD_TRANSFER_MOVE) { - Oid colocatedTableId = lfirst_oid(colocatedTableCell); - - /* check that user has owner rights in all co-located tables */ - EnsureTableOwner(colocatedTableId); - /* * Block concurrent DDL / TRUNCATE commands on the relation. Similarly, * block concurrent citus_move_shard_placement() on any shard of * the same relation. This is OK for now since we're executing shard * moves sequentially anyway. */ - LockRelationOid(colocatedTableId, ShareUpdateExclusiveLock); - - if (IsForeignTable(relationId)) - { - char *relationName = get_rel_name(colocatedTableId); - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot move shard"), - errdetail("Table %s is a foreign table. Moving " - "shards backed by foreign tables is " - "not supported.", relationName))); - } + LockColocatedRelationsForMove(colocatedTableList); } - /* we sort colocatedShardList so that lock operations will not cause any deadlocks */ - colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById); + ErrorIfForeignTableForShardTransfer(colocatedTableList, transferType); + + if (transferType == SHARD_TRANSFER_COPY) + { + ErrorIfReplicatingDistributedTableWithFKeys(colocatedTableList); + } /* - * If there are no active placements on the source and only active placements on - * the target node, we assume the copy to already be done. + * We sort shardIntervalList so that lock operations will not cause any + * deadlocks. */ - if (IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) && - !IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort)) + colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById); + + if (TransferAlreadyCompleted(colocatedShardList, + sourceNodeName, sourceNodePort, + targetNodeName, targetNodePort, + transferType)) { + /* if the transfer is already completed, we can return right away */ ereport(WARNING, (errmsg("shard is already present on node %s:%d", targetNodeName, targetNodePort), - errdetail("Move may have already completed."))); + errdetail("%s may have already completed.", + operationNameCapitalized))); return; } - foreach(colocatedShardCell, colocatedShardList) - { - ShardInterval *colocatedShard = (ShardInterval *) lfirst(colocatedShardCell); - uint64 colocatedShardId = colocatedShard->shardId; - - EnsureShardCanBeCopied(colocatedShardId, sourceNodeName, sourceNodePort, + EnsureAllShardsCanBeCopied(colocatedShardList, sourceNodeName, sourceNodePort, targetNodeName, targetNodePort); - } - char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid); if (shardReplicationMode == TRANSFER_MODE_AUTOMATIC) { VerifyTablesHaveReplicaIdentity(colocatedTableList); } - EnsureEnoughDiskSpaceForShardMove(colocatedShardList, sourceNodeName, sourceNodePort, - targetNodeName, targetNodePort); + EnsureEnoughDiskSpaceForShardMove(colocatedShardList, + sourceNodeName, sourceNodePort, + targetNodeName, targetNodePort, transferType); - - /* - * We want to be able to track progress of shard moves using - * get_rebalancer_progress. If this move is initiated by the rebalancer, - * then the rebalancer call has already set up the shared memory that is - * used to do that. But if citus_move_shard_placement is called directly by - * the user (or through any other mechanism), then the shared memory is not - * set up yet. In that case we do it here. - */ - if (!IsRebalancerInternalBackend()) - { - WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort); - WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort); - - PlacementUpdateEvent *placementUpdateEvent = palloc0( - sizeof(PlacementUpdateEvent)); - placementUpdateEvent->updateType = PLACEMENT_UPDATE_MOVE; - placementUpdateEvent->shardId = shardId; - placementUpdateEvent->sourceNode = sourceNode; - placementUpdateEvent->targetNode = targetNode; - SetupRebalanceMonitor(list_make1(placementUpdateEvent), relationId, - REBALANCE_PROGRESS_MOVING, - PLACEMENT_UPDATE_STATUS_SETTING_UP); - } + SetupRebalanceMonitorForShardTransfer(shardId, distributedTableId, + sourceNodeName, sourceNodePort, + targetNodeName, targetNodePort, + transferType); UpdatePlacementUpdateStatusForShardIntervalList( colocatedShardList, @@ -428,7 +450,7 @@ citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName, { BlockWritesToShardList(colocatedShardList); } - else + else if (transferType == SHARD_TRANSFER_MOVE) { /* * We prevent multiple shard moves in a transaction that use logical @@ -452,6 +474,20 @@ citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName, PlacementMovedUsingLogicalReplicationInTX = true; } + if (transferType == SHARD_TRANSFER_COPY && + !IsCitusTableType(distributedTableId, REFERENCE_TABLE)) + { + /* + * When copying a shard to a new node, we should first ensure that reference + * tables are present such that joins work immediately after copying the shard. + * When copying a reference table, we are probably trying to achieve just that. + * + * Since this a long-running operation we do this after the error checks, but + * before taking metadata locks. + */ + EnsureReferenceTablesExistOnAllNodesExtended(shardReplicationMode); + } + DropOrphanedResourcesInSeparateTransaction(); ShardInterval *colocatedShard = NULL; @@ -466,18 +502,21 @@ citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName, ErrorIfCleanupRecordForShardExists(qualifiedShardName); } - /* - * CopyColocatedShardPlacement function copies given shard with its co-located - * shards. - */ CopyShardTables(colocatedShardList, sourceNodeName, sourceNodePort, targetNodeName, - targetNodePort, useLogicalReplication, "citus_move_shard_placement"); + targetNodePort, useLogicalReplication, operationFunctionName); - /* delete old shards metadata and mark the shards as to be deferred drop */ - int32 sourceGroupId = GroupForNode(sourceNodeName, sourceNodePort); - InsertCleanupRecordsForShardPlacementsOnNode(colocatedShardList, - sourceGroupId); + if (transferType == SHARD_TRANSFER_MOVE) + { + /* delete old shards metadata and mark the shards as to be deferred drop */ + int32 sourceGroupId = GroupForNode(sourceNodeName, sourceNodePort); + InsertCleanupRecordsForShardPlacementsOnNode(colocatedShardList, + sourceGroupId); + } + /* + * Finally insert the placements to pg_dist_placement and sync it to the + * metadata workers. + */ colocatedShard = NULL; foreach_ptr(colocatedShard, colocatedShardList) { @@ -488,17 +527,30 @@ citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName, InsertShardPlacementRow(colocatedShardId, placementId, ShardLength(colocatedShardId), groupId); + + if (transferType == SHARD_TRANSFER_COPY && + ShouldSyncTableMetadata(colocatedShard->relationId)) + { + char *placementCommand = PlacementUpsertCommand(colocatedShardId, placementId, + 0, groupId); + + SendCommandToWorkersWithMetadata(placementCommand); + } } - /* - * Since this is move operation, we remove the placements from the metadata - * for the source node after copy. - */ - DropShardPlacementsFromMetadata(colocatedShardList, sourceNodeName, sourceNodePort); + if (transferType == SHARD_TRANSFER_MOVE) + { + /* + * Since this is move operation, we remove the placements from the metadata + * for the source node after copy. + */ + DropShardPlacementsFromMetadata(colocatedShardList, + sourceNodeName, sourceNodePort); - UpdateColocatedShardPlacementMetadataOnWorkers(shardId, sourceNodeName, - sourceNodePort, targetNodeName, - targetNodePort); + UpdateColocatedShardPlacementMetadataOnWorkers(shardId, sourceNodeName, + sourceNodePort, targetNodeName, + targetNodePort); + } UpdatePlacementUpdateStatusForShardIntervalList( colocatedShardList, @@ -611,6 +663,70 @@ IsShardListOnNode(List *colocatedShardList, char *targetNodeName, uint32 targetN } +/* + * LockColocatedRelationsForMove takes a list of relations, locks all of them + * using ShareUpdateExclusiveLock + */ +static void +LockColocatedRelationsForMove(List *colocatedTableList) +{ + Oid colocatedTableId = InvalidOid; + foreach_oid(colocatedTableId, colocatedTableList) + { + LockRelationOid(colocatedTableId, ShareUpdateExclusiveLock); + } +} + + +/* + * ErrorIfForeignTableForShardTransfer takes a list of relations, errors out if + * there's a foreign table in the list. + */ +static void +ErrorIfForeignTableForShardTransfer(List *colocatedTableList, + ShardTransferType transferType) +{ + Oid colocatedTableId = InvalidOid; + foreach_oid(colocatedTableId, colocatedTableList) + { + if (IsForeignTable(colocatedTableId)) + { + char *relationName = get_rel_name(colocatedTableId); + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot %s shard", + ShardTransferTypeNames[transferType]), + errdetail("Table %s is a foreign table. " + "%s shards backed by foreign tables is " + "not supported.", relationName, + ShardTransferTypeNamesContinuous[transferType]))); + } + } +} + + +/* + * EnsureAllShardsCanBeCopied is a wrapper around EnsureShardCanBeCopied. + */ +static void +EnsureAllShardsCanBeCopied(List *colocatedShardList, + char *sourceNodeName, uint32 sourceNodePort, + char *targetNodeName, uint32 targetNodePort) +{ + ShardInterval *colocatedShard = NULL; + foreach_ptr(colocatedShard, colocatedShardList) + { + uint64 colocatedShardId = colocatedShard->shardId; + + /* + * To transfer shard, there should be healthy placement in source node and no + * placement in the target node. + */ + EnsureShardCanBeCopied(colocatedShardId, sourceNodeName, sourceNodePort, + targetNodeName, targetNodePort); + } +} + + /* * EnsureEnoughDiskSpaceForShardMove checks that there is enough space for * shard moves of the given colocated shard list from source node to target node. @@ -619,9 +735,10 @@ IsShardListOnNode(List *colocatedShardList, char *targetNodeName, uint32 targetN static void EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList, char *sourceNodeName, uint32 sourceNodePort, - char *targetNodeName, uint32 targetNodePort) + char *targetNodeName, uint32 targetNodePort, + ShardTransferType transferType) { - if (!CheckAvailableSpaceBeforeMove) + if (!CheckAvailableSpaceBeforeMove || transferType != SHARD_TRANSFER_MOVE) { return; } @@ -636,6 +753,34 @@ EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList, } +/* + * TransferAlreadyCompleted returns true if the given shard transfer is already done. + * Returns false otherwise. + */ +static bool +TransferAlreadyCompleted(List *colocatedShardList, + char *sourceNodeName, uint32 sourceNodePort, + char *targetNodeName, uint32 targetNodePort, + ShardTransferType transferType) +{ + if (transferType == SHARD_TRANSFER_MOVE && + IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) && + !IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort)) + { + return true; + } + + if (transferType == SHARD_TRANSFER_COPY && + IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) && + IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort)) + { + return true; + } + + return false; +} + + /* * ShardListSizeInBytes returns the size in bytes of a set of shard tables. */ @@ -682,6 +827,49 @@ ShardListSizeInBytes(List *shardList, char *workerNodeName, uint32 } +/* + * SetupRebalanceMonitorForShardTransfer prepares the parameters and + * calls SetupRebalanceMonitor, unless the current transfer is a move + * initiated by the rebalancer. + * See comments on SetupRebalanceMonitor + */ +static void +SetupRebalanceMonitorForShardTransfer(uint64 shardId, Oid distributedTableId, + char *sourceNodeName, uint32 sourceNodePort, + char *targetNodeName, uint32 targetNodePort, + ShardTransferType transferType) +{ + if (transferType == SHARD_TRANSFER_MOVE && IsRebalancerInternalBackend()) + { + /* + * We want to be able to track progress of shard moves using + * get_rebalancer_progress. If this move is initiated by the rebalancer, + * then the rebalancer call has already set up the shared memory that is + * used to do that, so we should return here. + * But if citus_move_shard_placement is called directly by the user + * (or through any other mechanism), then the shared memory is not + * set up yet. In that case we do it here. + */ + return; + } + + WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort); + WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort); + + PlacementUpdateEvent *placementUpdateEvent = palloc0( + sizeof(PlacementUpdateEvent)); + placementUpdateEvent->updateType = + transferType == SHARD_TRANSFER_COPY ? PLACEMENT_UPDATE_COPY : + PLACEMENT_UPDATE_MOVE; + placementUpdateEvent->shardId = shardId; + placementUpdateEvent->sourceNode = sourceNode; + placementUpdateEvent->targetNode = targetNode; + SetupRebalanceMonitor(list_make1(placementUpdateEvent), distributedTableId, + REBALANCE_PROGRESS_MOVING, + PLACEMENT_UPDATE_STATUS_SETTING_UP); +} + + /* * CheckSpaceConstraints checks there is enough space to place the colocation * on the node that the connection is connected to. @@ -729,17 +917,19 @@ CheckSpaceConstraints(MultiConnection *connection, uint64 colocationSizeInBytes) /* - * ErrorIfTargetNodeIsNotSafeToMove throws error if the target node is not - * eligible for moving shards. + * ErrorIfTargetNodeIsNotSafeForTransfer throws error if the target node is not + * eligible for shard transfers. */ -void -ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int targetNodePort) +static void +ErrorIfTargetNodeIsNotSafeForTransfer(const char *targetNodeName, int targetNodePort, + ShardTransferType transferType) { WorkerNode *workerNode = FindWorkerNode(targetNodeName, targetNodePort); if (workerNode == NULL) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("Moving shards to a non-existing node is not supported"), + errmsg("%s shards to a non-existing node is not supported", + ShardTransferTypeNamesContinuous[transferType]), errhint( "Add the target node via SELECT citus_add_node('%s', %d);", targetNodeName, targetNodePort))); @@ -748,13 +938,14 @@ ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int targetNodePort) if (!workerNode->isActive) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("Moving shards to a non-active node is not supported"), + errmsg("%s shards to a non-active node is not supported", + ShardTransferTypeNamesContinuous[transferType]), errhint( "Activate the target node via SELECT citus_activate_node('%s', %d);", targetNodeName, targetNodePort))); } - if (!workerNode->shouldHaveShards) + if (transferType == SHARD_TRANSFER_MOVE && !workerNode->shouldHaveShards) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Moving shards to a node that shouldn't have a shard is " @@ -767,8 +958,9 @@ ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int targetNodePort) if (!NodeIsPrimary(workerNode)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("Moving shards to a secondary (e.g., replica) node is " - "not supported"))); + errmsg("%s shards to a secondary (e.g., replica) node is " + "not supported", + ShardTransferTypeNamesContinuous[transferType]))); } } @@ -1046,41 +1238,6 @@ ErrorIfTableCannotBeReplicated(Oid relationId) } -/* - * ErrorIfTargetNodeIsNotSafeToCopyTo throws an error if the target node is not - * eligible for copying shards. - */ -static void -ErrorIfTargetNodeIsNotSafeToCopyTo(const char *targetNodeName, int targetNodePort) -{ - WorkerNode *workerNode = FindWorkerNode(targetNodeName, targetNodePort); - if (workerNode == NULL) - { - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("Copying shards to a non-existing node is not supported"), - errhint( - "Add the target node via SELECT citus_add_node('%s', %d);", - targetNodeName, targetNodePort))); - } - - if (!workerNode->isActive) - { - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("Copying shards to a non-active node is not supported"), - errhint( - "Activate the target node via SELECT citus_activate_node('%s', %d);", - targetNodeName, targetNodePort))); - } - - if (!NodeIsPrimary(workerNode)) - { - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("Copying shards to a secondary (e.g., replica) node is " - "not supported"))); - } -} - - /* * LookupShardTransferMode maps the oids of citus.shard_transfer_mode enum * values to a char. @@ -1114,154 +1271,6 @@ LookupShardTransferMode(Oid shardReplicationModeOid) } -/* - * ReplicateColocatedShardPlacement replicates the given shard and its - * colocated shards from a source node to target node. - */ -static void -ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName, - int32 sourceNodePort, char *targetNodeName, - int32 targetNodePort, char shardReplicationMode) -{ - ShardInterval *shardInterval = LoadShardInterval(shardId); - Oid distributedTableId = shardInterval->relationId; - - ErrorIfSameNode(sourceNodeName, sourceNodePort, - targetNodeName, targetNodePort, - "copy"); - - ErrorIfTableCannotBeReplicated(shardInterval->relationId); - ErrorIfTargetNodeIsNotSafeToCopyTo(targetNodeName, targetNodePort); - EnsureNoModificationsHaveBeenDone(); - - AcquirePlacementColocationLock(shardInterval->relationId, ExclusiveLock, "copy"); - - List *colocatedTableList = ColocatedTableList(distributedTableId); - List *colocatedShardList = ColocatedShardIntervalList(shardInterval); - - EnsureTableListOwner(colocatedTableList); - EnsureTableListSuitableForReplication(colocatedTableList); - - /* - * We sort shardIntervalList so that lock operations will not cause any - * deadlocks. - */ - colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById); - - /* - * If there are active placements on both nodes, we assume the copy to already - * be done. - */ - if (IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) && - IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort)) - { - ereport(WARNING, (errmsg("shard is already present on node %s:%d", - targetNodeName, targetNodePort), - errdetail("Copy may have already completed."))); - return; - } - - WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort); - WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort); - - Oid relationId = RelationIdForShard(shardId); - PlacementUpdateEvent *placementUpdateEvent = palloc0( - sizeof(PlacementUpdateEvent)); - placementUpdateEvent->updateType = PLACEMENT_UPDATE_COPY; - placementUpdateEvent->shardId = shardId; - placementUpdateEvent->sourceNode = sourceNode; - placementUpdateEvent->targetNode = targetNode; - SetupRebalanceMonitor(list_make1(placementUpdateEvent), relationId, - REBALANCE_PROGRESS_MOVING, - PLACEMENT_UPDATE_STATUS_SETTING_UP); - - UpdatePlacementUpdateStatusForShardIntervalList( - colocatedShardList, - sourceNodeName, - sourceNodePort, - PLACEMENT_UPDATE_STATUS_SETTING_UP); - - /* - * At this point of the shard replication, we don't need to block the writes to - * shards when logical replication is used. - */ - bool useLogicalReplication = CanUseLogicalReplication(distributedTableId, - shardReplicationMode); - if (!useLogicalReplication) - { - BlockWritesToShardList(colocatedShardList); - } - - ShardInterval *colocatedShard = NULL; - foreach_ptr(colocatedShard, colocatedShardList) - { - uint64 colocatedShardId = colocatedShard->shardId; - - /* - * For shard copy, there should be healthy placement in source node and no - * placement in the target node. - */ - EnsureShardCanBeCopied(colocatedShardId, sourceNodeName, sourceNodePort, - targetNodeName, targetNodePort); - } - - if (shardReplicationMode == TRANSFER_MODE_AUTOMATIC) - { - VerifyTablesHaveReplicaIdentity(colocatedTableList); - } - - if (!IsCitusTableType(distributedTableId, REFERENCE_TABLE)) - { - /* - * When copying a shard to a new node, we should first ensure that reference - * tables are present such that joins work immediately after copying the shard. - * When copying a reference table, we are probably trying to achieve just that. - * - * Since this a long-running operation we do this after the error checks, but - * before taking metadata locks. - */ - EnsureReferenceTablesExistOnAllNodesExtended(shardReplicationMode); - } - - DropOrphanedResourcesInSeparateTransaction(); - - CopyShardTables(colocatedShardList, sourceNodeName, sourceNodePort, - targetNodeName, targetNodePort, useLogicalReplication, - "citus_copy_shard_placement"); - - /* - * Finally insert the placements to pg_dist_placement and sync it to the - * metadata workers. - */ - foreach_ptr(colocatedShard, colocatedShardList) - { - uint64 colocatedShardId = colocatedShard->shardId; - uint32 groupId = GroupForNode(targetNodeName, targetNodePort); - uint64 placementId = GetNextPlacementId(); - - InsertShardPlacementRow(colocatedShardId, placementId, - ShardLength(colocatedShardId), - groupId); - - if (ShouldSyncTableMetadata(colocatedShard->relationId)) - { - char *placementCommand = PlacementUpsertCommand(colocatedShardId, placementId, - 0, groupId); - - SendCommandToWorkersWithMetadata(placementCommand); - } - } - - UpdatePlacementUpdateStatusForShardIntervalList( - colocatedShardList, - sourceNodeName, - sourceNodePort, - PLACEMENT_UPDATE_STATUS_COMPLETED); - - FinalizeCurrentProgressMonitor(); -} - - /* * EnsureTableListOwner ensures current user owns given tables. Superusers * are regarded as owners. @@ -1278,25 +1287,15 @@ EnsureTableListOwner(List *tableIdList) /* - * EnsureTableListSuitableForReplication errors out if given tables are not + * ErrorIfReplicatingDistributedTableWithFKeys errors out if given tables are not * suitable for replication. */ static void -EnsureTableListSuitableForReplication(List *tableIdList) +ErrorIfReplicatingDistributedTableWithFKeys(List *tableIdList) { Oid tableId = InvalidOid; foreach_oid(tableId, tableIdList) { - if (IsForeignTable(tableId)) - { - char *relationName = get_rel_name(tableId); - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot replicate shard"), - errdetail("Table %s is a foreign table. Replicating " - "shards backed by foreign tables is " - "not supported.", relationName))); - } - List *foreignConstraintCommandList = GetReferencingForeignConstaintCommands(tableId); @@ -1318,7 +1317,7 @@ EnsureTableListSuitableForReplication(List *tableIdList) static void CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodePort, char *targetNodeName, int32 targetNodePort, bool useLogicalReplication, - char *operationName) + const char *operationName) { if (list_length(shardIntervalList) < 1) { diff --git a/src/backend/distributed/operations/worker_copy_table_to_node_udf.c b/src/backend/distributed/operations/worker_copy_table_to_node_udf.c index 7af80ef55..f0f83744d 100644 --- a/src/backend/distributed/operations/worker_copy_table_to_node_udf.c +++ b/src/backend/distributed/operations/worker_copy_table_to_node_udf.c @@ -53,8 +53,14 @@ worker_copy_table_to_node(PG_FUNCTION_ARGS) targetNodeId); StringInfo selectShardQueryForCopy = makeStringInfo(); + + /* + * Even though we do COPY(SELECT ...) all the columns, we can't just do SELECT * because we need to not COPY generated colums. + */ + const char *columnList = CopyableColumnNamesFromRelationName(relationSchemaName, + relationName); appendStringInfo(selectShardQueryForCopy, - "SELECT * FROM %s;", relationQualifiedName); + "SELECT %s FROM %s;", columnList, relationQualifiedName); ParamListInfo params = NULL; ExecuteQueryStringIntoDestReceiver(selectShardQueryForCopy->data, params, diff --git a/src/backend/distributed/operations/worker_shard_copy.c b/src/backend/distributed/operations/worker_shard_copy.c index 9239caffb..00a5413c9 100644 --- a/src/backend/distributed/operations/worker_shard_copy.c +++ b/src/backend/distributed/operations/worker_shard_copy.c @@ -24,6 +24,7 @@ #include "distributed/relation_utils.h" #include "distributed/version_compat.h" #include "distributed/local_executor.h" +#include "distributed/replication_origin_session_utils.h" /* * LocalCopyBuffer is used in copy callback to return the copied rows. @@ -73,13 +74,14 @@ static void ShardCopyDestReceiverDestroy(DestReceiver *destReceiver); static bool CanUseLocalCopy(uint32_t destinationNodeId); static StringInfo ConstructShardCopyStatement(List *destinationShardFullyQualifiedName, bool - useBinaryFormat); + useBinaryFormat, TupleDesc tupleDesc); static void WriteLocalTuple(TupleTableSlot *slot, ShardCopyDestReceiver *copyDest); static int ReadFromLocalBufferCallback(void *outBuf, int minRead, int maxRead); static void LocalCopyToShard(ShardCopyDestReceiver *copyDest, CopyOutState localCopyOutState); static void ConnectToRemoteAndStartCopy(ShardCopyDestReceiver *copyDest); + static bool CanUseLocalCopy(uint32_t destinationNodeId) { @@ -103,9 +105,16 @@ ConnectToRemoteAndStartCopy(ShardCopyDestReceiver *copyDest) NULL /* database (current) */); ClaimConnectionExclusively(copyDest->connection); + + RemoteTransactionBeginIfNecessary(copyDest->connection); + + SetupReplicationOriginRemoteSession(copyDest->connection); + + StringInfo copyStatement = ConstructShardCopyStatement( copyDest->destinationShardFullyQualifiedName, - copyDest->copyOutState->binary); + copyDest->copyOutState->binary, + copyDest->tupleDescriptor); if (!SendRemoteCommand(copyDest->connection, copyStatement->data)) { @@ -184,6 +193,8 @@ ShardCopyDestReceiverReceive(TupleTableSlot *slot, DestReceiver *dest) CopyOutState copyOutState = copyDest->copyOutState; if (copyDest->useLocalCopy) { + /* Setup replication origin session for local copy*/ + WriteLocalTuple(slot, copyDest); if (copyOutState->fe_msgbuf->len > LocalCopyFlushThresholdByte) { @@ -259,6 +270,11 @@ ShardCopyDestReceiverStartup(DestReceiver *dest, int operation, TupleDesc copyDest->columnOutputFunctions = ColumnOutputFunctions(inputTupleDescriptor, copyOutState->binary); copyDest->copyOutState = copyOutState; + if (copyDest->useLocalCopy) + { + /* Setup replication origin session for local copy*/ + SetupReplicationOriginLocalSession(); + } } @@ -317,6 +333,9 @@ ShardCopyDestReceiverShutdown(DestReceiver *dest) PQclear(result); ForgetResults(copyDest->connection); + + ResetReplicationOriginRemoteSession(copyDest->connection); + CloseConnection(copyDest->connection); } } @@ -329,6 +348,10 @@ static void ShardCopyDestReceiverDestroy(DestReceiver *dest) { ShardCopyDestReceiver *copyDest = (ShardCopyDestReceiver *) dest; + if (copyDest->useLocalCopy) + { + ResetReplicationOriginLocalSession(); + } if (copyDest->copyOutState) { @@ -344,21 +367,80 @@ ShardCopyDestReceiverDestroy(DestReceiver *dest) } +/* + * CopyableColumnNamesFromTupleDesc function creates and returns a comma seperated column names string to be used in COPY + * and SELECT statements when copying a table. The COPY and SELECT statements should filter out the GENERATED columns since COPY + * statement fails to handle them. Iterating over the attributes of the table we also need to skip the dropped columns. + */ +const char * +CopyableColumnNamesFromTupleDesc(TupleDesc tupDesc) +{ + StringInfo columnList = makeStringInfo(); + bool firstInList = true; + + for (int i = 0; i < tupDesc->natts; i++) + { + Form_pg_attribute att = TupleDescAttr(tupDesc, i); + if (att->attgenerated || att->attisdropped) + { + continue; + } + if (!firstInList) + { + appendStringInfo(columnList, ","); + } + + firstInList = false; + + appendStringInfo(columnList, "%s", quote_identifier(NameStr(att->attname))); + } + + return columnList->data; +} + + +/* + * CopyableColumnNamesFromRelationName function is a wrapper for CopyableColumnNamesFromTupleDesc. + */ +const char * +CopyableColumnNamesFromRelationName(const char *schemaName, const char *relationName) +{ + Oid namespaceOid = get_namespace_oid(schemaName, true); + + Oid relationId = get_relname_relid(relationName, namespaceOid); + + Relation relation = relation_open(relationId, AccessShareLock); + + TupleDesc tupleDesc = RelationGetDescr(relation); + + const char *columnList = CopyableColumnNamesFromTupleDesc(tupleDesc); + + relation_close(relation, NoLock); + + return columnList; +} + + /* * ConstructShardCopyStatement constructs the text of a COPY statement * for copying into a result table */ static StringInfo ConstructShardCopyStatement(List *destinationShardFullyQualifiedName, bool - useBinaryFormat) + useBinaryFormat, + TupleDesc tupleDesc) { char *destinationShardSchemaName = linitial(destinationShardFullyQualifiedName); char *destinationShardRelationName = lsecond(destinationShardFullyQualifiedName); + StringInfo command = makeStringInfo(); - appendStringInfo(command, "COPY %s.%s FROM STDIN", + + const char *columnList = CopyableColumnNamesFromTupleDesc(tupleDesc); + + appendStringInfo(command, "COPY %s.%s (%s) FROM STDIN", quote_identifier(destinationShardSchemaName), quote_identifier( - destinationShardRelationName)); + destinationShardRelationName), columnList); if (useBinaryFormat) { diff --git a/src/backend/distributed/operations/worker_split_copy_udf.c b/src/backend/distributed/operations/worker_split_copy_udf.c index b96475992..c154ac040 100644 --- a/src/backend/distributed/operations/worker_split_copy_udf.c +++ b/src/backend/distributed/operations/worker_split_copy_udf.c @@ -110,8 +110,13 @@ worker_split_copy(PG_FUNCTION_ARGS) splitCopyInfoList)))); StringInfo selectShardQueryForCopy = makeStringInfo(); + const char *columnList = CopyableColumnNamesFromRelationName( + sourceShardToCopySchemaName, + sourceShardToCopyName); + appendStringInfo(selectShardQueryForCopy, - "SELECT * FROM %s;", sourceShardToCopyQualifiedName); + "SELECT %s FROM %s;", columnList, + sourceShardToCopyQualifiedName); ParamListInfo params = NULL; ExecuteQueryStringIntoDestReceiver(selectShardQueryForCopy->data, params, diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 701ae4ff5..1fcc45585 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -34,6 +34,7 @@ #include "distributed/intermediate_results.h" #include "distributed/listutils.h" #include "distributed/coordinator_protocol.h" +#include "distributed/merge_planner.h" #include "distributed/metadata_cache.h" #include "distributed/multi_executor.h" #include "distributed/distributed_planner.h" @@ -68,6 +69,17 @@ #include "utils/syscache.h" +/* RouterPlanType is used to determine the router plan to invoke */ +typedef enum RouterPlanType +{ + INSERT_SELECT_INTO_CITUS_TABLE, + INSERT_SELECT_INTO_LOCAL_TABLE, + DML_QUERY, + SELECT_QUERY, + MERGE_QUERY, + REPLAN_WITH_BOUND_PARAMETERS +} RouterPlanType; + static List *plannerRestrictionContextList = NIL; int MultiTaskQueryLogLevel = CITUS_LOG_LEVEL_OFF; /* multi-task query log level */ static uint64 NextPlanId = 1; @@ -75,12 +87,8 @@ static uint64 NextPlanId = 1; /* keep track of planner call stack levels */ int PlannerLevel = 0; -static void ErrorIfQueryHasUnsupportedMergeCommand(Query *queryTree, - List *rangeTableList); -static bool ContainsMergeCommandWalker(Node *node); static bool ListContainsDistributedTableRTE(List *rangeTableList, bool *maybeHasForeignDistributedTable); -static bool IsUpdateOrDelete(Query *query); static PlannedStmt * CreateDistributedPlannedStmt( DistributedPlanningContext *planContext); static PlannedStmt * InlineCtesAndCreateDistributedPlannedStmt(uint64 planId, @@ -132,7 +140,10 @@ static PlannedStmt * PlanDistributedStmt(DistributedPlanningContext *planContext static RTEListProperties * GetRTEListProperties(List *rangeTableList); static List * TranslatedVars(PlannerInfo *root, int relationIndex); static void WarnIfListHasForeignDistributedTable(List *rangeTableList); -static void ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList); +static RouterPlanType GetRouterPlanType(Query *query, + Query *originalQuery, + bool hasUnresolvedParams); + /* Distributed planner hook */ PlannedStmt * @@ -156,7 +167,7 @@ distributed_planner(Query *parse, * We cannot have merge command for this path as well because * there cannot be recursively planned merge command. */ - Assert(!ContainsMergeCommandWalker((Node *) parse)); + Assert(!IsMergeQuery(parse)); needsDistributedPlanning = true; } @@ -200,12 +211,6 @@ distributed_planner(Query *parse, if (!fastPathRouterQuery) { - /* - * Fast path queries cannot have merge command, and we - * prevent the remaining here. - */ - ErrorIfQueryHasUnsupportedMergeCommand(parse, rangeTableList); - /* * When there are partitioned tables (not applicable to fast path), * pretend that they are regular tables to avoid unnecessary work @@ -304,72 +309,6 @@ distributed_planner(Query *parse, } -/* - * ErrorIfQueryHasUnsupportedMergeCommand walks over the query tree and bails out - * if there is no Merge command (e.g., CMD_MERGE) in the query tree. For merge, - * looks for all supported combinations, throws an exception if any violations - * are seen. - */ -static void -ErrorIfQueryHasUnsupportedMergeCommand(Query *queryTree, List *rangeTableList) -{ - /* - * Postgres currently doesn't support Merge queries inside subqueries and - * ctes, but lets be defensive and do query tree walk anyway. - * - * We do not call this path for fast-path queries to avoid this additional - * overhead. - */ - if (!ContainsMergeCommandWalker((Node *) queryTree)) - { - /* No MERGE found */ - return; - } - - - /* - * In Citus we have limited support for MERGE, it's allowed - * only if all the tables(target, source or any CTE) tables - * are are local i.e. a combination of Citus local and Non-Citus - * tables (regular Postgres tables). - */ - ErrorIfMergeHasUnsupportedTables(queryTree, rangeTableList); -} - - -/* - * ContainsMergeCommandWalker walks over the node and finds if there are any - * Merge command (e.g., CMD_MERGE) in the node. - */ -static bool -ContainsMergeCommandWalker(Node *node) -{ - #if PG_VERSION_NUM < PG_VERSION_15 - return false; - #endif - - if (node == NULL) - { - return false; - } - - if (IsA(node, Query)) - { - Query *query = (Query *) node; - if (IsMergeQuery(query)) - { - return true; - } - - return query_tree_walker((Query *) node, ContainsMergeCommandWalker, NULL, 0); - } - - return expression_tree_walker(node, ContainsMergeCommandWalker, NULL); - - return false; -} - - /* * ExtractRangeTableEntryList is a wrapper around ExtractRangeTableEntryWalker. * The function traverses the input query and returns all the range table @@ -669,17 +608,6 @@ IsMultiTaskPlan(DistributedPlan *distributedPlan) } -/* - * IsUpdateOrDelete returns true if the query performs an update or delete. - */ -bool -IsUpdateOrDelete(Query *query) -{ - return query->commandType == CMD_UPDATE || - query->commandType == CMD_DELETE; -} - - /* * PlanFastPathDistributedStmt creates a distributed planned statement using * the FastPathPlanner. @@ -850,7 +778,7 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext) * if it is planned as a multi shard modify query. */ if ((distributedPlan->planningError || - (IsUpdateOrDelete(planContext->originalQuery) && IsMultiTaskPlan( + (UpdateOrDeleteOrMergeQuery(planContext->originalQuery) && IsMultiTaskPlan( distributedPlan))) && hasUnresolvedParams) { @@ -955,6 +883,51 @@ TryCreateDistributedPlannedStmt(PlannedStmt *localPlan, } +/* + * GetRouterPlanType checks the parse tree to return appropriate plan type. + */ +static RouterPlanType +GetRouterPlanType(Query *query, Query *originalQuery, bool hasUnresolvedParams) +{ + if (!IsModifyCommand(originalQuery)) + { + return SELECT_QUERY; + } + + Oid targetRelationId = ModifyQueryResultRelationId(query); + + EnsureModificationsCanRunOnRelation(targetRelationId); + EnsurePartitionTableNotReplicated(targetRelationId); + + /* Check the type of modification being done */ + + if (InsertSelectIntoCitusTable(originalQuery)) + { + if (hasUnresolvedParams) + { + return REPLAN_WITH_BOUND_PARAMETERS; + } + return INSERT_SELECT_INTO_CITUS_TABLE; + } + else if (InsertSelectIntoLocalTable(originalQuery)) + { + if (hasUnresolvedParams) + { + return REPLAN_WITH_BOUND_PARAMETERS; + } + return INSERT_SELECT_INTO_LOCAL_TABLE; + } + else if (IsMergeQuery(originalQuery)) + { + return MERGE_QUERY; + } + else + { + return DML_QUERY; + } +} + + /* * CreateDistributedPlan generates a distributed plan for a query. * It goes through 3 steps: @@ -972,88 +945,83 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina DistributedPlan *distributedPlan = NULL; bool hasCtes = originalQuery->cteList != NIL; - if (IsModifyCommand(originalQuery)) + /* Step 1: Try router planner */ + + RouterPlanType routerPlan = GetRouterPlanType(query, originalQuery, + hasUnresolvedParams); + + switch (routerPlan) { - Oid targetRelationId = ModifyQueryResultRelationId(query); - - EnsureModificationsCanRunOnRelation(targetRelationId); - - EnsurePartitionTableNotReplicated(targetRelationId); - - if (InsertSelectIntoCitusTable(originalQuery)) + case INSERT_SELECT_INTO_CITUS_TABLE: { - if (hasUnresolvedParams) - { - /* - * Unresolved parameters can cause performance regressions in - * INSERT...SELECT when the partition column is a parameter - * because we don't perform any additional pruning in the executor. - */ - return NULL; - } - distributedPlan = - CreateInsertSelectPlan(planId, originalQuery, plannerRestrictionContext, + CreateInsertSelectPlan(planId, + originalQuery, + plannerRestrictionContext, boundParams); + break; } - else if (InsertSelectIntoLocalTable(originalQuery)) + + case INSERT_SELECT_INTO_LOCAL_TABLE: { - if (hasUnresolvedParams) - { - /* - * Unresolved parameters can cause performance regressions in - * INSERT...SELECT when the partition column is a parameter - * because we don't perform any additional pruning in the executor. - */ - return NULL; - } distributedPlan = - CreateInsertSelectIntoLocalTablePlan(planId, originalQuery, boundParams, + CreateInsertSelectIntoLocalTablePlan(planId, + originalQuery, + boundParams, hasUnresolvedParams, plannerRestrictionContext); + break; } - else + + case DML_QUERY: { /* modifications are always routed through the same planner/executor */ distributedPlan = CreateModifyPlan(originalQuery, query, plannerRestrictionContext); + break; } - /* the functions above always return a plan, possibly with an error */ - Assert(distributedPlan); + case MERGE_QUERY: + { + distributedPlan = + CreateMergePlan(originalQuery, query, plannerRestrictionContext); + break; + } - if (distributedPlan->planningError == NULL) + case REPLAN_WITH_BOUND_PARAMETERS: { - return distributedPlan; + /* + * Unresolved parameters can cause performance regressions in + * INSERT...SELECT when the partition column is a parameter + * because we don't perform any additional pruning in the executor. + */ + return NULL; } - else + + case SELECT_QUERY: { - RaiseDeferredError(distributedPlan->planningError, DEBUG2); + /* + * For select queries we, if router executor is enabled, first try to + * plan the query as a router query. If not supported, otherwise try + * the full blown plan/optimize/physical planning process needed to + * produce distributed query plans. + */ + distributedPlan = + CreateRouterPlan(originalQuery, query, plannerRestrictionContext); + break; } } + + /* the functions above always return a plan, possibly with an error */ + Assert(distributedPlan); + + if (distributedPlan->planningError == NULL) + { + return distributedPlan; + } else { - /* - * For select queries we, if router executor is enabled, first try to - * plan the query as a router query. If not supported, otherwise try - * the full blown plan/optimize/physical planning process needed to - * produce distributed query plans. - */ - - distributedPlan = CreateRouterPlan(originalQuery, query, - plannerRestrictionContext); - if (distributedPlan->planningError == NULL) - { - return distributedPlan; - } - else - { - /* - * For debugging it's useful to display why query was not - * router plannable. - */ - RaiseDeferredError(distributedPlan->planningError, DEBUG2); - } + RaiseDeferredError(distributedPlan->planningError, DEBUG2); } if (hasUnresolvedParams) @@ -1082,6 +1050,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina boundParams); Assert(originalQuery != NULL); + /* Step 2: Generate subplans for CTEs and complex subqueries */ + /* * Plan subqueries and CTEs that cannot be pushed down by recursively * calling the planner and return the resulting plans to subPlanList. @@ -1182,6 +1152,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina query->cteList = NIL; Assert(originalQuery->cteList == NIL); + /* Step 3: Try Logical planner */ + MultiTreeRoot *logicalPlan = MultiLogicalPlanCreate(originalQuery, query, plannerRestrictionContext); MultiLogicalPlanOptimize(logicalPlan); @@ -2611,148 +2583,3 @@ WarnIfListHasForeignDistributedTable(List *rangeTableList) } } } - - -/* - * IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is - * permitted on special relations, such as materialized view, returns true only if - * it's a "source" relation. - */ -bool -IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte) -{ - if (!IsMergeQuery(parse)) - { - return false; - } - - RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable); - - /* Is it a target relation? */ - if (targetRte->relid == rte->relid) - { - return false; - } - - return true; -} - - -/* - * ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE - * present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus - * tables (regular Postgres tables), raises an exception for all other combinations. - */ -static void -ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList) -{ - ListCell *tableCell = NULL; - - foreach(tableCell, rangeTableList) - { - RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(tableCell); - Oid relationId = rangeTableEntry->relid; - - switch (rangeTableEntry->rtekind) - { - case RTE_RELATION: - { - /* Check the relation type */ - break; - } - - case RTE_SUBQUERY: - case RTE_FUNCTION: - case RTE_TABLEFUNC: - case RTE_VALUES: - case RTE_JOIN: - case RTE_CTE: - { - /* Skip them as base table(s) will be checked */ - continue; - } - - /* - * RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations, - * such as, trigger data; until we find a genuine use case, raise an - * exception. - * RTE_RESULT is a node added by the planner and we shouldn't - * encounter it in the parse tree. - */ - case RTE_NAMEDTUPLESTORE: - case RTE_RESULT: - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("MERGE command is not supported with " - "Tuplestores and results"))); - break; - } - - default: - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("MERGE command: Unrecognized range table entry."))); - } - } - - /* RTE Relation can be of various types, check them now */ - - /* skip the regular views as they are replaced with subqueries */ - if (rangeTableEntry->relkind == RELKIND_VIEW) - { - continue; - } - - if (rangeTableEntry->relkind == RELKIND_MATVIEW || - rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE) - { - /* Materialized view or Foreign table as target is not allowed */ - if (IsMergeAllowedOnRelation(parse, rangeTableEntry)) - { - /* Non target relation is ok */ - continue; - } - else - { - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("MERGE command is not allowed " - "on materialized view"))); - } - } - - if (rangeTableEntry->relkind != RELKIND_RELATION && - rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE) - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("Unexpected relation type(relkind:%c) in MERGE command", - rangeTableEntry->relkind))); - } - - Assert(rangeTableEntry->relid != 0); - - /* Distributed tables and Reference tables are not supported yet */ - if (IsCitusTableType(relationId, REFERENCE_TABLE) || - IsCitusTableType(relationId, DISTRIBUTED_TABLE)) - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("MERGE command is not supported on " - "distributed/reference tables yet"))); - } - - /* Regular Postgres tables and Citus local tables are allowed */ - if (!IsCitusTable(relationId) || - IsCitusTableType(relationId, CITUS_LOCAL_TABLE)) - { - continue; - } - - - /* Any other Citus table type missing ? */ - } - - /* All the tables are local, supported */ -} diff --git a/src/backend/distributed/planner/fast_path_router_planner.c b/src/backend/distributed/planner/fast_path_router_planner.c index aa029f3c0..ecb62478a 100644 --- a/src/backend/distributed/planner/fast_path_router_planner.c +++ b/src/backend/distributed/planner/fast_path_router_planner.c @@ -54,10 +54,11 @@ bool EnableFastPathRouterPlanner = true; static bool ColumnAppearsMultipleTimes(Node *quals, Var *distributionKey); -static bool ConjunctionContainsColumnFilter(Node *node, Var *column, - Node **distributionKeyValue); static bool DistKeyInSimpleOpExpression(Expr *clause, Var *distColumn, Node **distributionKeyValue); +static bool ConjunctionContainsColumnFilter(Node *node, + Var *column, + Node **distributionKeyValue); /* diff --git a/src/backend/distributed/planner/insert_select_planner.c b/src/backend/distributed/planner/insert_select_planner.c index 21fd13800..e58970fbd 100644 --- a/src/backend/distributed/planner/insert_select_planner.c +++ b/src/backend/distributed/planner/insert_select_planner.c @@ -875,7 +875,7 @@ RouterModifyTaskForShardInterval(Query *originalQuery, &prunedShardIntervalListList, replacePrunedQueryWithDummy, &multiShardModifyQuery, NULL, - false); + NULL); Assert(!multiShardModifyQuery); @@ -938,6 +938,7 @@ RouterModifyTaskForShardInterval(Query *originalQuery, modifyTask->taskPlacementList = insertShardPlacementList; modifyTask->relationShardList = relationShardList; modifyTask->replicationModel = targetTableCacheEntry->replicationModel; + modifyTask->isLocalTableModification = false; return modifyTask; } diff --git a/src/backend/distributed/planner/merge_planner.c b/src/backend/distributed/planner/merge_planner.c new file mode 100644 index 000000000..c67095624 --- /dev/null +++ b/src/backend/distributed/planner/merge_planner.c @@ -0,0 +1,738 @@ +/*------------------------------------------------------------------------- + * + * merge_planner.c + * + * This file contains functions to help plan MERGE queries. + * + * Copyright (c) Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#include + +#include "postgres.h" +#include "nodes/makefuncs.h" +#include "optimizer/optimizer.h" +#include "parser/parsetree.h" +#include "utils/lsyscache.h" + +#include "distributed/citus_clauses.h" +#include "distributed/listutils.h" +#include "distributed/merge_planner.h" +#include "distributed/multi_logical_optimizer.h" +#include "distributed/multi_router_planner.h" +#include "distributed/pg_version_constants.h" +#include "distributed/query_pushdown_planning.h" + +#if PG_VERSION_NUM >= PG_VERSION_15 + +static DeferredErrorMessage * CheckIfRTETypeIsUnsupported(Query *parse, + RangeTblEntry *rangeTableEntry); +static DeferredErrorMessage * ErrorIfDistTablesNotColocated(Query *parse, + List * + distTablesList, + PlannerRestrictionContext + * + plannerRestrictionContext); +static DeferredErrorMessage * ErrorIfMergeHasUnsupportedTables(Query *parse, + List *rangeTableList, + PlannerRestrictionContext * + restrictionContext); +static bool IsDistributionColumnInMergeSource(Expr *columnExpression, Query *query, bool + skipOuterVars); +static DeferredErrorMessage * InsertDistributionColumnMatchesSource(Query *query, + RangeTblEntry * + resultRte); + +static DeferredErrorMessage * MergeQualAndTargetListFunctionsSupported(Oid + resultRelationId, + FromExpr *joinTree, + Node *quals, + List *targetList, + CmdType commandType); +#endif + + +/* + * CreateMergePlan attempts to create a plan for the given MERGE SQL + * statement. If planning fails ->planningError is set to a description + * of the failure. + */ +DistributedPlan * +CreateMergePlan(Query *originalQuery, Query *query, + PlannerRestrictionContext *plannerRestrictionContext) +{ + DistributedPlan *distributedPlan = CitusMakeNode(DistributedPlan); + bool multiShardQuery = false; + + Assert(originalQuery->commandType == CMD_MERGE); + + distributedPlan->modLevel = RowModifyLevelForQuery(query); + + distributedPlan->planningError = MergeQuerySupported(originalQuery, + multiShardQuery, + plannerRestrictionContext); + + if (distributedPlan->planningError != NULL) + { + return distributedPlan; + } + + Job *job = RouterJob(originalQuery, plannerRestrictionContext, + &distributedPlan->planningError); + + if (distributedPlan->planningError != NULL) + { + return distributedPlan; + } + + ereport(DEBUG1, (errmsg("Creating MERGE router plan"))); + + distributedPlan->workerJob = job; + distributedPlan->combineQuery = NULL; + + /* MERGE doesn't support RETURNING clause */ + distributedPlan->expectResults = false; + distributedPlan->targetRelationId = ResultRelationOidForQuery(query); + + distributedPlan->fastPathRouterPlan = + plannerRestrictionContext->fastPathRestrictionContext->fastPathRouterQuery; + + return distributedPlan; +} + + +/* + * MergeQuerySupported does check for a MERGE command in the query, if it finds + * one, it will verify the below criteria + * - Supported tables and combinations in ErrorIfMergeHasUnsupportedTables + * - Distributed tables requirements in ErrorIfDistTablesNotColocated + * - Checks target-lists and functions-in-quals in TargetlistAndFunctionsSupported + */ +DeferredErrorMessage * +MergeQuerySupported(Query *originalQuery, bool multiShardQuery, + PlannerRestrictionContext *plannerRestrictionContext) +{ + /* function is void for pre-15 versions of Postgres */ + #if PG_VERSION_NUM < PG_VERSION_15 + + return NULL; + + #else + + /* + * TODO: For now, we are adding an exception where any volatile or stable + * functions are not allowed in the MERGE query, but this will become too + * restrictive as this will prevent many useful and simple cases, such as, + * INSERT VALUES(ts::timestamp), bigserial column inserts etc. But without + * this restriction, we have a potential danger of some of the function(s) + * getting executed at the worker which will result in incorrect behavior. + */ + if (contain_mutable_functions((Node *) originalQuery)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "non-IMMUTABLE functions are not yet supported " + "in MERGE sql with distributed tables ", + NULL, NULL); + } + + List *rangeTableList = ExtractRangeTableEntryList(originalQuery); + RangeTblEntry *resultRte = ExtractResultRelationRTE(originalQuery); + + /* + * Fast path queries cannot have merge command, and we prevent the remaining here. + * In Citus we have limited support for MERGE, it's allowed only if all + * the tables(target, source or any CTE) tables are are local i.e. a + * combination of Citus local and Non-Citus tables (regular Postgres tables) + * or distributed tables with some restrictions, please see header of routine + * ErrorIfDistTablesNotColocated for details. + */ + DeferredErrorMessage *deferredError = + ErrorIfMergeHasUnsupportedTables(originalQuery, + rangeTableList, + plannerRestrictionContext); + if (deferredError) + { + /* MERGE's unsupported combination, raise the exception */ + RaiseDeferredError(deferredError, ERROR); + } + + Oid resultRelationId = resultRte->relid; + deferredError = MergeQualAndTargetListFunctionsSupported(resultRelationId, + originalQuery->jointree, + originalQuery->jointree-> + quals, + originalQuery->targetList, + originalQuery->commandType); + if (deferredError) + { + return deferredError; + } + + /* + * MERGE is a special case where we have multiple modify statements + * within itself. Check each INSERT/UPDATE/DELETE individually. + */ + MergeAction *action = NULL; + foreach_ptr(action, originalQuery->mergeActionList) + { + Assert(originalQuery->returningList == NULL); + deferredError = MergeQualAndTargetListFunctionsSupported(resultRelationId, + originalQuery->jointree, + action->qual, + action->targetList, + action->commandType); + if (deferredError) + { + /* MERGE's unsupported scenario, raise the exception */ + RaiseDeferredError(deferredError, ERROR); + } + } + + deferredError = + InsertDistributionColumnMatchesSource(originalQuery, resultRte); + if (deferredError) + { + /* MERGE's unsupported scenario, raise the exception */ + RaiseDeferredError(deferredError, ERROR); + } + + if (multiShardQuery) + { + deferredError = + DeferErrorIfUnsupportedSubqueryPushdown(originalQuery, + plannerRestrictionContext); + if (deferredError) + { + return deferredError; + } + } + + if (HasDangerousJoinUsing(originalQuery->rtable, (Node *) originalQuery->jointree)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "a join with USING causes an internal naming " + "conflict, use ON instead", NULL, NULL); + } + + return NULL; + + #endif +} + + +/* + * IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is + * permitted on special relations, such as materialized view, returns true only if + * it's a "source" relation. + */ +bool +IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte) +{ + if (!IsMergeQuery(parse)) + { + return false; + } + + /* Fetch the MERGE target relation */ + RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable); + + /* Is it a target relation? */ + if (targetRte->relid == rte->relid) + { + return false; + } + + return true; +} + + +#if PG_VERSION_NUM >= PG_VERSION_15 + +/* + * ErrorIfDistTablesNotColocated Checks to see if + * + * - There are a minimum of two distributed tables (source and a target). + * - All the distributed tables are indeed colocated. + * + * If any of the conditions are not met, it raises an exception. + */ +static DeferredErrorMessage * +ErrorIfDistTablesNotColocated(Query *parse, List *distTablesList, + PlannerRestrictionContext * + plannerRestrictionContext) +{ + /* All MERGE tables must be distributed */ + if (list_length(distTablesList) < 2) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "For MERGE command, both the source and target " + "must be distributed", NULL, NULL); + } + + /* All distributed tables must be colocated */ + if (!AllDistributedRelationsInRTEListColocated(distTablesList)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "For MERGE command, all the distributed tables " + "must be colocated", NULL, NULL); + } + + return NULL; +} + + +/* + * ErrorIfRTETypeIsUnsupported Checks for types of tables that are not supported, such + * as, reference tables, append-distributed tables and materialized view as target relation. + * Routine returns NULL for the supported types, error message for everything else. + */ +static DeferredErrorMessage * +CheckIfRTETypeIsUnsupported(Query *parse, RangeTblEntry *rangeTableEntry) +{ + if (rangeTableEntry->relkind == RELKIND_MATVIEW || + rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE) + { + /* Materialized view or Foreign table as target is not allowed */ + if (IsMergeAllowedOnRelation(parse, rangeTableEntry)) + { + /* Non target relation is ok */ + return NULL; + } + else + { + /* Usually we don't reach this exception as the Postgres parser catches it */ + StringInfo errorMessage = makeStringInfo(); + appendStringInfo(errorMessage, "MERGE command is not allowed on " + "relation type(relkind:%c)", + rangeTableEntry->relkind); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + errorMessage->data, NULL, NULL); + } + } + + if (rangeTableEntry->relkind != RELKIND_RELATION && + rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE) + { + StringInfo errorMessage = makeStringInfo(); + appendStringInfo(errorMessage, "Unexpected table type(relkind:%c) " + "in MERGE command", rangeTableEntry->relkind); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + errorMessage->data, NULL, NULL); + } + + Assert(rangeTableEntry->relid != 0); + + /* Reference tables are not supported yet */ + if (IsCitusTableType(rangeTableEntry->relid, REFERENCE_TABLE)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command is not supported on reference " + "tables yet", NULL, NULL); + } + + /* Append/Range tables are not supported */ + if (IsCitusTableType(rangeTableEntry->relid, APPEND_DISTRIBUTED) || + IsCitusTableType(rangeTableEntry->relid, RANGE_DISTRIBUTED)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "For MERGE command, all the distributed tables " + "must be colocated, for append/range distribution, " + "colocation is not supported", NULL, + "Consider using hash distribution instead"); + } + + return NULL; +} + + +/* + * ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE + * present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus + * tables (regular Postgres tables), or distributed tables with some restrictions, please + * see header of routine ErrorIfDistTablesNotColocated for details, raises an exception + * for all other combinations. + */ +static DeferredErrorMessage * +ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList, + PlannerRestrictionContext *restrictionContext) +{ + List *distTablesList = NIL; + bool foundLocalTables = false; + + RangeTblEntry *rangeTableEntry = NULL; + foreach_ptr(rangeTableEntry, rangeTableList) + { + Oid relationId = rangeTableEntry->relid; + + switch (rangeTableEntry->rtekind) + { + case RTE_RELATION: + { + /* Check the relation type */ + break; + } + + case RTE_SUBQUERY: + case RTE_FUNCTION: + case RTE_TABLEFUNC: + case RTE_VALUES: + case RTE_JOIN: + case RTE_CTE: + { + /* Skip them as base table(s) will be checked */ + continue; + } + + /* + * RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations, + * such as, trigger data; until we find a genuine use case, raise an + * exception. + * RTE_RESULT is a node added by the planner and we shouldn't + * encounter it in the parse tree. + */ + case RTE_NAMEDTUPLESTORE: + case RTE_RESULT: + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command is not supported with " + "Tuplestores and results", + NULL, NULL); + } + + default: + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command: Unrecognized range table entry.", + NULL, NULL); + } + } + + /* RTE Relation can be of various types, check them now */ + + /* skip the regular views as they are replaced with subqueries */ + if (rangeTableEntry->relkind == RELKIND_VIEW) + { + continue; + } + + DeferredErrorMessage *errorMessage = + CheckIfRTETypeIsUnsupported(parse, rangeTableEntry); + if (errorMessage) + { + return errorMessage; + } + + /* + * For now, save all distributed tables, later (below) we will + * check for supported combination(s). + */ + if (IsCitusTableType(relationId, DISTRIBUTED_TABLE)) + { + distTablesList = lappend(distTablesList, rangeTableEntry); + continue; + } + + /* Regular Postgres tables and Citus local tables are allowed */ + if (!IsCitusTable(relationId) || + IsCitusTableType(relationId, CITUS_LOCAL_TABLE)) + { + foundLocalTables = true; + continue; + } + + /* Any other Citus table type missing ? */ + } + + /* Ensure all tables are indeed local */ + if (foundLocalTables && list_length(distTablesList) == 0) + { + /* All the tables are local, supported */ + return NULL; + } + else if (foundLocalTables && list_length(distTablesList) > 0) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE command is not supported with " + "combination of distributed/local tables yet", + NULL, NULL); + } + + /* Ensure all distributed tables are indeed co-located */ + return ErrorIfDistTablesNotColocated(parse, + distTablesList, + restrictionContext); +} + + +/* + * IsPartitionColumnInMerge returns true if the given column is a partition column. + * The function uses FindReferencedTableColumn to find the original relation + * id and column that the column expression refers to. It then checks whether + * that column is a partition column of the relation. + * + * Also, the function returns always false for reference tables given that + * reference tables do not have partition column. + * + * If skipOuterVars is true, then it doesn't process the outervars. + */ +bool +IsDistributionColumnInMergeSource(Expr *columnExpression, Query *query, bool + skipOuterVars) +{ + bool isDistributionColumn = false; + Var *column = NULL; + RangeTblEntry *relationRTE = NULL; + + /* ParentQueryList is same as the original query for MERGE */ + FindReferencedTableColumn(columnExpression, list_make1(query), query, &column, + &relationRTE, + skipOuterVars); + Oid relationId = relationRTE ? relationRTE->relid : InvalidOid; + if (relationId != InvalidOid && column != NULL) + { + Var *distributionColumn = DistPartitionKey(relationId); + + /* not all distributed tables have partition column */ + if (distributionColumn != NULL && column->varattno == + distributionColumn->varattno) + { + isDistributionColumn = true; + } + } + + return isDistributionColumn; +} + + +/* + * InsertDistributionColumnMatchesSource check to see if MERGE is inserting a + * value into the target which is not from the source table, if so, it + * raises an exception. + * Note: Inserting random values other than the joined column values will + * result in unexpected behaviour of rows ending up in incorrect shards, to + * prevent such mishaps, we disallow such inserts here. + */ +static DeferredErrorMessage * +InsertDistributionColumnMatchesSource(Query *query, RangeTblEntry *resultRte) +{ + Assert(IsMergeQuery(query)); + + if (!IsCitusTableType(resultRte->relid, DISTRIBUTED_TABLE)) + { + return NULL; + } + + bool foundDistributionColumn = false; + MergeAction *action = NULL; + foreach_ptr(action, query->mergeActionList) + { + /* Skip MATCHED clause as INSERTS are not allowed in it*/ + if (action->matched) + { + continue; + } + + /* NOT MATCHED can have either INSERT or DO NOTHING */ + if (action->commandType == CMD_NOTHING) + { + return NULL; + } + + if (action->targetList == NIL) + { + /* INSERT DEFAULT VALUES is not allowed */ + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "cannot perform MERGE INSERT with DEFAULTS", + NULL, NULL); + } + + Assert(action->commandType == CMD_INSERT); + Var *targetKey = PartitionColumn(resultRte->relid, 1); + + TargetEntry *targetEntry = NULL; + foreach_ptr(targetEntry, action->targetList) + { + AttrNumber originalAttrNo = targetEntry->resno; + + /* skip processing of target table non-partition columns */ + if (originalAttrNo != targetKey->varattno) + { + continue; + } + + foundDistributionColumn = true; + + if (IsA(targetEntry->expr, Var)) + { + if (IsDistributionColumnInMergeSource(targetEntry->expr, query, true)) + { + return NULL; + } + else + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE INSERT must use the source table " + "distribution column value", + NULL, NULL); + } + } + else + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE INSERT must refer a source column " + "for distribution column ", + NULL, NULL); + } + } + + if (!foundDistributionColumn) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "MERGE INSERT must have distribution column as value", + NULL, NULL); + } + } + + return NULL; +} + + +/* + * MergeQualAndTargetListFunctionsSupported Checks WHEN/ON clause actions to see what functions + * are allowed, if we are updating distribution column, etc. + */ +static DeferredErrorMessage * +MergeQualAndTargetListFunctionsSupported(Oid resultRelationId, FromExpr *joinTree, + Node *quals, + List *targetList, CmdType commandType) +{ + uint32 rangeTableId = 1; + Var *distributionColumn = NULL; + if (IsCitusTable(resultRelationId) && HasDistributionKey(resultRelationId)) + { + distributionColumn = PartitionColumn(resultRelationId, rangeTableId); + } + + ListCell *targetEntryCell = NULL; + bool hasVarArgument = false; /* A STABLE function is passed a Var argument */ + bool hasBadCoalesce = false; /* CASE/COALESCE passed a mutable function */ + foreach(targetEntryCell, targetList) + { + TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell); + + bool targetEntryDistributionColumn = false; + AttrNumber targetColumnAttrNumber = InvalidAttrNumber; + + if (distributionColumn) + { + if (commandType == CMD_UPDATE) + { + /* + * Note that it is not possible to give an alias to + * UPDATE table SET ... + */ + if (targetEntry->resname) + { + targetColumnAttrNumber = get_attnum(resultRelationId, + targetEntry->resname); + if (targetColumnAttrNumber == distributionColumn->varattno) + { + targetEntryDistributionColumn = true; + } + } + } + } + + if (targetEntryDistributionColumn && + TargetEntryChangesValue(targetEntry, distributionColumn, joinTree)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "updating the distribution column is not " + "allowed in MERGE actions", + NULL, NULL); + } + + if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr, + CitusIsVolatileFunction)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "functions used in MERGE actions on distributed " + "tables must not be VOLATILE", + NULL, NULL); + } + + if (MasterIrreducibleExpression((Node *) targetEntry->expr, + &hasVarArgument, &hasBadCoalesce)) + { + Assert(hasVarArgument || hasBadCoalesce); + } + + if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr, + NodeIsFieldStore)) + { + /* DELETE cannot do field indirection already */ + Assert(commandType == CMD_UPDATE || commandType == CMD_INSERT); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "inserting or modifying composite type fields is not " + "supported", NULL, + "Use the column name to insert or update the composite " + "type as a single value"); + } + } + + + /* + * Check the condition, convert list of expressions into expression tree for further processing + */ + if (quals) + { + if (IsA(quals, List)) + { + quals = (Node *) make_ands_explicit((List *) quals); + } + + if (FindNodeMatchingCheckFunction((Node *) quals, CitusIsVolatileFunction)) + { + StringInfo errorMessage = makeStringInfo(); + appendStringInfo(errorMessage, "functions used in the %s clause of MERGE " + "queries on distributed tables must not be VOLATILE", + (commandType == CMD_MERGE) ? "ON" : "WHEN"); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + errorMessage->data, NULL, NULL); + } + else if (MasterIrreducibleExpression(quals, &hasVarArgument, &hasBadCoalesce)) + { + Assert(hasVarArgument || hasBadCoalesce); + } + } + + if (hasVarArgument) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "STABLE functions used in MERGE queries " + "cannot be called with column references", + NULL, NULL); + } + + if (hasBadCoalesce) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "non-IMMUTABLE functions are not allowed in CASE or " + "COALESCE statements", + NULL, NULL); + } + + if (quals != NULL && nodeTag(quals) == T_CurrentOfExpr) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "cannot run MERGE actions with cursors", + NULL, NULL); + } + + return NULL; +} + + +#endif diff --git a/src/backend/distributed/planner/multi_explain.c b/src/backend/distributed/planner/multi_explain.c index d7766a913..c23509df1 100644 --- a/src/backend/distributed/planner/multi_explain.c +++ b/src/backend/distributed/planner/multi_explain.c @@ -29,6 +29,7 @@ #include "distributed/citus_nodefuncs.h" #include "distributed/connection_management.h" #include "distributed/deparse_shard_query.h" +#include "distributed/executor_util.h" #include "distributed/insert_select_planner.h" #include "distributed/insert_select_executor.h" #include "distributed/listutils.h" @@ -199,20 +200,6 @@ CitusExplainScan(CustomScanState *node, List *ancestors, struct ExplainState *es return; } - /* - * ALTER TABLE statements are not explained by postgres. However ALTER TABLE statements - * may trigger SELECT statements causing explain hook to run. This situation causes a crash in a worker. - * Therefore we will detect if we are explaining a triggered query when we are processing - * an ALTER TABLE statement and stop explain in this situation. - */ - if (AlterTableInProgress()) - { - ExplainPropertyText("Citus Explain Scan", - "Explain for triggered constraint validation queries during ALTER TABLE commands are not supported by Citus", - es); - return; - } - ExplainOpenGroup("Distributed Query", "Distributed Query", true, es); /* diff --git a/src/backend/distributed/planner/multi_join_order.c b/src/backend/distributed/planner/multi_join_order.c index 9b2342b20..b1195c664 100644 --- a/src/backend/distributed/planner/multi_join_order.c +++ b/src/backend/distributed/planner/multi_join_order.c @@ -1383,7 +1383,7 @@ DistPartitionKey(Oid relationId) CitusTableCacheEntry *partitionEntry = GetCitusTableCacheEntry(relationId); /* non-distributed tables do not have partition column */ - if (IsCitusTableTypeCacheEntry(partitionEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKeyCacheEntry(partitionEntry)) { return NULL; } diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index 19b4aea4d..851afc4b6 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -3385,6 +3385,13 @@ GetAggregateType(Aggref *aggregateExpression) { Oid aggFunctionId = aggregateExpression->aggfnoid; + /* custom aggregates with combine func take precedence over name-based logic */ + if (aggFunctionId >= FirstNormalObjectId && + AggregateEnabledCustom(aggregateExpression)) + { + return AGGREGATE_CUSTOM_COMBINE; + } + /* look up the function name */ char *aggregateProcName = get_func_name(aggFunctionId); if (aggregateProcName == NULL) @@ -3395,8 +3402,6 @@ GetAggregateType(Aggref *aggregateExpression) uint32 aggregateCount = lengthof(AggregateNames); - Assert(AGGREGATE_INVALID_FIRST == 0); - for (uint32 aggregateIndex = 1; aggregateIndex < aggregateCount; aggregateIndex++) { const char *aggregateName = AggregateNames[aggregateIndex]; @@ -3465,7 +3470,7 @@ GetAggregateType(Aggref *aggregateExpression) } } - + /* handle any remaining built-in aggregates with a suitable combinefn */ if (AggregateEnabledCustom(aggregateExpression)) { return AGGREGATE_CUSTOM_COMBINE; diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 7e665b567..d9322bf5e 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -228,7 +228,7 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList) * If the expression belongs to a non-distributed table continue searching for * other partition keys. */ - if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (IsCitusTable(relationId) && !HasDistributionKey(relationId)) { continue; } diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c index 901e9de17..f488a1cd5 100644 --- a/src/backend/distributed/planner/multi_physical_planner.c +++ b/src/backend/distributed/planner/multi_physical_planner.c @@ -2199,7 +2199,7 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId, Oid relationId = relationRestriction->relationId; CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId); - if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKeyCacheEntry(cacheEntry)) { continue; } @@ -2377,7 +2377,7 @@ ErrorIfUnsupportedShardDistribution(Query *query) nonReferenceRelations = lappend_oid(nonReferenceRelations, relationId); } - else if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + else if (IsCitusTable(relationId) && !HasDistributionKey(relationId)) { /* do not need to handle non-distributed tables */ continue; @@ -2482,7 +2482,7 @@ QueryPushdownTaskCreate(Query *originalQuery, int shardIndex, ShardInterval *shardInterval = NULL; CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId); - if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKeyCacheEntry(cacheEntry)) { /* non-distributed tables have only one shard */ shardInterval = cacheEntry->sortedShardIntervalArray[0]; @@ -3697,7 +3697,7 @@ PartitionedOnColumn(Var *column, List *rangeTableList, List *dependentJobList) Var *partitionColumn = PartitionColumn(relationId, rangeTableId); /* non-distributed tables do not have partition columns */ - if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (IsCitusTable(relationId) && !HasDistributionKey(relationId)) { return false; } @@ -4573,7 +4573,8 @@ RowModifyLevelForQuery(Query *query) } if (commandType == CMD_UPDATE || - commandType == CMD_DELETE) + commandType == CMD_DELETE || + commandType == CMD_MERGE) { return ROW_MODIFY_NONCOMMUTATIVE; } @@ -5343,8 +5344,7 @@ ActiveShardPlacementLists(List *taskList) /* - * CompareShardPlacements compares two shard placements by their tuple oid; this - * oid reflects the tuple's insertion order into pg_dist_placement. + * CompareShardPlacements compares two shard placements by placement id. */ int CompareShardPlacements(const void *leftElement, const void *rightElement) @@ -5370,6 +5370,35 @@ CompareShardPlacements(const void *leftElement, const void *rightElement) } +/* + * CompareGroupShardPlacements compares two group shard placements by placement id. + */ +int +CompareGroupShardPlacements(const void *leftElement, const void *rightElement) +{ + const GroupShardPlacement *leftPlacement = + *((const GroupShardPlacement **) leftElement); + const GroupShardPlacement *rightPlacement = + *((const GroupShardPlacement **) rightElement); + + uint64 leftPlacementId = leftPlacement->placementId; + uint64 rightPlacementId = rightPlacement->placementId; + + if (leftPlacementId < rightPlacementId) + { + return -1; + } + else if (leftPlacementId > rightPlacementId) + { + return 1; + } + else + { + return 0; + } +} + + /* * LeftRotateList returns a copy of the given list that has been cyclically * shifted to the left by the given rotation count. For this, the function diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c index ebd909074..c3677bb1a 100644 --- a/src/backend/distributed/planner/multi_router_planner.c +++ b/src/backend/distributed/planner/multi_router_planner.c @@ -28,11 +28,13 @@ #include "distributed/deparse_shard_query.h" #include "distributed/distribution_column.h" #include "distributed/errormessage.h" +#include "distributed/executor_util.h" #include "distributed/log_utils.h" #include "distributed/insert_select_planner.h" #include "distributed/intermediate_result_pruning.h" #include "distributed/metadata_utility.h" #include "distributed/coordinator_protocol.h" +#include "distributed/merge_planner.h" #include "distributed/metadata_cache.h" #include "distributed/multi_executor.h" #include "distributed/multi_join_order.h" @@ -113,6 +115,7 @@ typedef struct WalkerState } WalkerState; bool EnableRouterExecution = true; +bool EnableNonColocatedRouterQueryPushdown = false; /* planner functions forward declarations */ @@ -121,34 +124,24 @@ static void CreateSingleTaskRouterSelectPlan(DistributedPlan *distributedPlan, Query *query, PlannerRestrictionContext * plannerRestrictionContext); -static Oid ResultRelationOidForQuery(Query *query); static bool IsTidColumn(Node *node); static DeferredErrorMessage * ModifyPartialQuerySupported(Query *queryTree, bool multiShardQuery, Oid *distributedTableId); -static bool NodeIsFieldStore(Node *node); -static DeferredErrorMessage * MultiShardUpdateDeleteMergeSupported(Query *originalQuery, - PlannerRestrictionContext - * - plannerRestrictionContext); +static DeferredErrorMessage * MultiShardUpdateDeleteSupported(Query *originalQuery, + PlannerRestrictionContext + * + plannerRestrictionContext); static DeferredErrorMessage * SingleShardUpdateDeleteSupported(Query *originalQuery, PlannerRestrictionContext * plannerRestrictionContext); -static bool HasDangerousJoinUsing(List *rtableList, Node *jtnode); -static bool MasterIrreducibleExpression(Node *expression, bool *varArgument, - bool *badCoalesce); static bool MasterIrreducibleExpressionWalker(Node *expression, WalkerState *state); static bool MasterIrreducibleExpressionFunctionChecker(Oid func_id, void *context); -static bool TargetEntryChangesValue(TargetEntry *targetEntry, Var *column, - FromExpr *joinTree); static Job * RouterInsertJob(Query *originalQuery); static void ErrorIfNoShardsExist(CitusTableCacheEntry *cacheEntry); static DeferredErrorMessage * DeferErrorIfModifyView(Query *queryTree); static Job * CreateJob(Query *query); static Task * CreateTask(TaskType taskType); -static Job * RouterJob(Query *originalQuery, - PlannerRestrictionContext *plannerRestrictionContext, - DeferredErrorMessage **planningError); static bool RelationPrunesToMultipleShards(List *relationShardList); static void NormalizeMultiRowInsertTargetList(Query *query); static void AppendNextDummyColReference(Alias *expendedReferenceNames); @@ -445,7 +438,7 @@ ModifyQueryResultRelationId(Query *query) * ResultRelationOidForQuery returns the OID of the relation this is modified * by a given query. */ -static Oid +Oid ResultRelationOidForQuery(Query *query) { RangeTblEntry *resultRTE = rt_fetch(query->resultRelation, query->rtable); @@ -512,6 +505,161 @@ IsTidColumn(Node *node) } +/* + * TargetlistAndFunctionsSupported implements a subset of what ModifyPartialQuerySupported + * checks, that subset being checking what functions are allowed, if we are + * updating distribution column, etc. + * Note: This subset of checks are repeated for each MERGE modify action. + */ +DeferredErrorMessage * +TargetlistAndFunctionsSupported(Oid resultRelationId, FromExpr *joinTree, Node *quals, + List *targetList, + CmdType commandType, List *returningList) +{ + uint32 rangeTableId = 1; + Var *partitionColumn = NULL; + + if (IsCitusTable(resultRelationId)) + { + partitionColumn = PartitionColumn(resultRelationId, rangeTableId); + } + + bool hasVarArgument = false; /* A STABLE function is passed a Var argument */ + bool hasBadCoalesce = false; /* CASE/COALESCE passed a mutable function */ + ListCell *targetEntryCell = NULL; + + foreach(targetEntryCell, targetList) + { + TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell); + + /* skip resjunk entries: UPDATE adds some for ctid, etc. */ + if (targetEntry->resjunk) + { + continue; + } + + bool targetEntryPartitionColumn = false; + AttrNumber targetColumnAttrNumber = InvalidAttrNumber; + + /* reference tables do not have partition column */ + if (partitionColumn == NULL) + { + targetEntryPartitionColumn = false; + } + else + { + if (commandType == CMD_UPDATE) + { + /* + * Note that it is not possible to give an alias to + * UPDATE table SET ... + */ + if (targetEntry->resname) + { + targetColumnAttrNumber = get_attnum(resultRelationId, + targetEntry->resname); + if (targetColumnAttrNumber == partitionColumn->varattno) + { + targetEntryPartitionColumn = true; + } + } + } + } + + + if (commandType == CMD_UPDATE && + FindNodeMatchingCheckFunction((Node *) targetEntry->expr, + CitusIsVolatileFunction)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "functions used in UPDATE queries on distributed " + "tables must not be VOLATILE", + NULL, NULL); + } + + if (commandType == CMD_UPDATE && targetEntryPartitionColumn && + TargetEntryChangesValue(targetEntry, partitionColumn, + joinTree)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "modifying the partition value of rows is not " + "allowed", + NULL, NULL); + } + + if (commandType == CMD_UPDATE && + MasterIrreducibleExpression((Node *) targetEntry->expr, + &hasVarArgument, &hasBadCoalesce)) + { + Assert(hasVarArgument || hasBadCoalesce); + } + + if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr, + NodeIsFieldStore)) + { + /* DELETE cannot do field indirection already */ + Assert(commandType == CMD_UPDATE || commandType == CMD_INSERT); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "inserting or modifying composite type fields is not " + "supported", NULL, + "Use the column name to insert or update the composite " + "type as a single value"); + } + } + + if (joinTree != NULL) + { + if (FindNodeMatchingCheckFunction((Node *) quals, + CitusIsVolatileFunction)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "functions used in the WHERE/ON/WHEN clause of modification " + "queries on distributed tables must not be VOLATILE", + NULL, NULL); + } + else if (MasterIrreducibleExpression(quals, &hasVarArgument, + &hasBadCoalesce)) + { + Assert(hasVarArgument || hasBadCoalesce); + } + } + + if (hasVarArgument) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "STABLE functions used in UPDATE queries " + "cannot be called with column references", + NULL, NULL); + } + + if (hasBadCoalesce) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "non-IMMUTABLE functions are not allowed in CASE or " + "COALESCE statements", + NULL, NULL); + } + + if (contain_mutable_functions((Node *) returningList)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "non-IMMUTABLE functions are not allowed in the " + "RETURNING clause", + NULL, NULL); + } + + if (quals != NULL && + nodeTag(quals) == T_CurrentOfExpr) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "cannot run DML queries with cursors", NULL, + NULL); + } + + return NULL; +} + + /* * ModifyPartialQuerySupported implements a subset of what ModifyQuerySupported checks, * that subset being what's necessary to check modifying CTEs for. @@ -620,148 +768,21 @@ ModifyPartialQuerySupported(Query *queryTree, bool multiShardQuery, Oid resultRelationId = ModifyQueryResultRelationId(queryTree); *distributedTableIdOutput = resultRelationId; - uint32 rangeTableId = 1; - Var *partitionColumn = NULL; - if (IsCitusTable(resultRelationId)) - { - partitionColumn = PartitionColumn(resultRelationId, rangeTableId); - } commandType = queryTree->commandType; if (commandType == CMD_INSERT || commandType == CMD_UPDATE || commandType == CMD_DELETE) { - bool hasVarArgument = false; /* A STABLE function is passed a Var argument */ - bool hasBadCoalesce = false; /* CASE/COALESCE passed a mutable function */ - FromExpr *joinTree = queryTree->jointree; - ListCell *targetEntryCell = NULL; - - foreach(targetEntryCell, queryTree->targetList) + deferredError = + TargetlistAndFunctionsSupported(resultRelationId, + queryTree->jointree, + queryTree->jointree->quals, + queryTree->targetList, + commandType, + queryTree->returningList); + if (deferredError) { - TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell); - - /* skip resjunk entries: UPDATE adds some for ctid, etc. */ - if (targetEntry->resjunk) - { - continue; - } - - bool targetEntryPartitionColumn = false; - AttrNumber targetColumnAttrNumber = InvalidAttrNumber; - - /* reference tables do not have partition column */ - if (partitionColumn == NULL) - { - targetEntryPartitionColumn = false; - } - else - { - if (commandType == CMD_UPDATE) - { - /* - * Note that it is not possible to give an alias to - * UPDATE table SET ... - */ - if (targetEntry->resname) - { - targetColumnAttrNumber = get_attnum(resultRelationId, - targetEntry->resname); - if (targetColumnAttrNumber == partitionColumn->varattno) - { - targetEntryPartitionColumn = true; - } - } - } - } - - - if (commandType == CMD_UPDATE && - FindNodeMatchingCheckFunction((Node *) targetEntry->expr, - CitusIsVolatileFunction)) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "functions used in UPDATE queries on distributed " - "tables must not be VOLATILE", - NULL, NULL); - } - - if (commandType == CMD_UPDATE && targetEntryPartitionColumn && - TargetEntryChangesValue(targetEntry, partitionColumn, - queryTree->jointree)) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "modifying the partition value of rows is not " - "allowed", - NULL, NULL); - } - - if (commandType == CMD_UPDATE && - MasterIrreducibleExpression((Node *) targetEntry->expr, - &hasVarArgument, &hasBadCoalesce)) - { - Assert(hasVarArgument || hasBadCoalesce); - } - - if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr, - NodeIsFieldStore)) - { - /* DELETE cannot do field indirection already */ - Assert(commandType == CMD_UPDATE || commandType == CMD_INSERT); - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "inserting or modifying composite type fields is not " - "supported", NULL, - "Use the column name to insert or update the composite " - "type as a single value"); - } - } - - if (joinTree != NULL) - { - if (FindNodeMatchingCheckFunction((Node *) joinTree->quals, - CitusIsVolatileFunction)) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "functions used in the WHERE clause of modification " - "queries on distributed tables must not be VOLATILE", - NULL, NULL); - } - else if (MasterIrreducibleExpression(joinTree->quals, &hasVarArgument, - &hasBadCoalesce)) - { - Assert(hasVarArgument || hasBadCoalesce); - } - } - - if (hasVarArgument) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "STABLE functions used in UPDATE queries " - "cannot be called with column references", - NULL, NULL); - } - - if (hasBadCoalesce) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "non-IMMUTABLE functions are not allowed in CASE or " - "COALESCE statements", - NULL, NULL); - } - - if (contain_mutable_functions((Node *) queryTree->returningList)) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "non-IMMUTABLE functions are not allowed in the " - "RETURNING clause", - NULL, NULL); - } - - if (queryTree->jointree->quals != NULL && - nodeTag(queryTree->jointree->quals) == T_CurrentOfExpr) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "cannot run DML queries with cursors", NULL, - NULL); + return deferredError; } } @@ -866,7 +887,7 @@ IsLocallyAccessibleCitusLocalTable(Oid relationId) /* * NodeIsFieldStore returns true if given Node is a FieldStore object. */ -static bool +bool NodeIsFieldStore(Node *node) { return node && IsA(node, FieldStore); @@ -888,8 +909,10 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer PlannerRestrictionContext *plannerRestrictionContext) { Oid distributedTableId = InvalidOid; - DeferredErrorMessage *error = ModifyPartialQuerySupported(queryTree, multiShardQuery, - &distributedTableId); + + DeferredErrorMessage *error = + ModifyPartialQuerySupported(queryTree, multiShardQuery, + &distributedTableId); if (error) { return error; @@ -954,17 +977,10 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer } else if (rangeTableEntry->relkind == RELKIND_MATVIEW) { - if (IsMergeAllowedOnRelation(originalQuery, rangeTableEntry)) - { - continue; - } - else - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "materialized views in " - "modify queries are not supported", - NULL, NULL); - } + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "materialized views in " + "modify queries are not supported", + NULL, NULL); } /* for other kinds of relations, check if it's distributed */ else @@ -1065,7 +1081,7 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer if (multiShardQuery) { - errorMessage = MultiShardUpdateDeleteMergeSupported( + errorMessage = MultiShardUpdateDeleteSupported( originalQuery, plannerRestrictionContext); } @@ -1246,12 +1262,12 @@ ErrorIfOnConflictNotSupported(Query *queryTree) /* - * MultiShardUpdateDeleteMergeSupported returns the error message if the update/delete is + * MultiShardUpdateDeleteSupported returns the error message if the update/delete is * not pushdownable, otherwise it returns NULL. */ static DeferredErrorMessage * -MultiShardUpdateDeleteMergeSupported(Query *originalQuery, - PlannerRestrictionContext *plannerRestrictionContext) +MultiShardUpdateDeleteSupported(Query *originalQuery, + PlannerRestrictionContext *plannerRestrictionContext) { DeferredErrorMessage *errorMessage = NULL; RangeTblEntry *resultRangeTable = ExtractResultRelationRTE(originalQuery); @@ -1282,8 +1298,9 @@ MultiShardUpdateDeleteMergeSupported(Query *originalQuery, } else { - errorMessage = DeferErrorIfUnsupportedSubqueryPushdown(originalQuery, - plannerRestrictionContext); + errorMessage = DeferErrorIfUnsupportedSubqueryPushdown( + originalQuery, + plannerRestrictionContext); } return errorMessage; @@ -1323,7 +1340,7 @@ SingleShardUpdateDeleteSupported(Query *originalQuery, * HasDangerousJoinUsing search jointree for unnamed JOIN USING. Check the * implementation of has_dangerous_join_using in ruleutils. */ -static bool +bool HasDangerousJoinUsing(List *rtableList, Node *joinTreeNode) { if (IsA(joinTreeNode, RangeTblRef)) @@ -1427,7 +1444,7 @@ IsMergeQuery(Query *query) * which do, but for now we just error out. That makes both the code and user-education * easier. */ -static bool +bool MasterIrreducibleExpression(Node *expression, bool *varArgument, bool *badCoalesce) { WalkerState data; @@ -1575,7 +1592,7 @@ MasterIrreducibleExpressionFunctionChecker(Oid func_id, void *context) * expression is a value that is implied by the qualifiers of the join * tree, or the target entry sets a different column. */ -static bool +bool TargetEntryChangesValue(TargetEntry *targetEntry, Var *column, FromExpr *joinTree) { bool isColumnValueChanged = true; @@ -1796,7 +1813,7 @@ ExtractFirstCitusTableId(Query *query) * RouterJob builds a Job to represent a single shard select/update/delete and * multiple shard update/delete queries. */ -static Job * +Job * RouterJob(Query *originalQuery, PlannerRestrictionContext *plannerRestrictionContext, DeferredErrorMessage **planningError) { @@ -1846,8 +1863,8 @@ RouterJob(Query *originalQuery, PlannerRestrictionContext *plannerRestrictionCon if (*planningError) { /* - * For MERGE, we do _not_ plan anything other than Router job, let's - * not continue further down the lane in distributed planning, simply + * For MERGE, we do _not_ plan any other router job than the MERGE job itself, + * let's not continue further down the lane in distributed planning, simply * bail out. */ if (IsMergeQuery(originalQuery)) @@ -2320,9 +2337,20 @@ PlanRouterQuery(Query *originalQuery, } Assert(UpdateOrDeleteOrMergeQuery(originalQuery)); - planningError = ModifyQuerySupported(originalQuery, originalQuery, - isMultiShardQuery, - plannerRestrictionContext); + + if (IsMergeQuery(originalQuery)) + { + planningError = MergeQuerySupported(originalQuery, + isMultiShardQuery, + plannerRestrictionContext); + } + else + { + planningError = ModifyQuerySupported(originalQuery, originalQuery, + isMultiShardQuery, + plannerRestrictionContext); + } + if (planningError != NULL) { return planningError; @@ -2643,7 +2671,7 @@ TargetShardIntervalForFastPathQuery(Query *query, bool *isMultiShardQuery, { Oid relationId = ExtractFirstCitusTableId(query); - if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKey(relationId)) { /* we don't need to do shard pruning for non-distributed tables */ return list_make1(LoadShardIntervalList(relationId)); @@ -2936,7 +2964,7 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError) Assert(query->commandType == CMD_INSERT); /* reference tables and citus local tables can only have one shard */ - if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKeyCacheEntry(cacheEntry)) { List *shardIntervalList = LoadShardIntervalList(distributedTableId); @@ -3477,7 +3505,7 @@ ExtractInsertPartitionKeyValue(Query *query) uint32 rangeTableId = 1; Const *singlePartitionValueConst = NULL; - if (IsCitusTableType(distributedTableId, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKey(distributedTableId)) { return NULL; } @@ -3589,6 +3617,8 @@ DeferErrorIfUnsupportedRouterPlannableSelectQuery(Query *query) bool hasDistributedTable = false; bool hasReferenceTable = false; + List *distributedRelationList = NIL; + ExtractRangeTableRelationWalker((Node *) query, &rangeTableRelationList); foreach(rangeTableRelationCell, rangeTableRelationList) { @@ -3626,6 +3656,8 @@ DeferErrorIfUnsupportedRouterPlannableSelectQuery(Query *query) if (IsCitusTableType(distributedTableId, DISTRIBUTED_TABLE)) { hasDistributedTable = true; + distributedRelationList = lappend_oid(distributedRelationList, + distributedTableId); } /* @@ -3680,6 +3712,15 @@ DeferErrorIfUnsupportedRouterPlannableSelectQuery(Query *query) NULL, NULL); } + if (!EnableNonColocatedRouterQueryPushdown && + !AllDistributedRelationsInListColocated(distributedRelationList)) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "router planner does not support queries that " + "reference non-colocated distributed tables", + NULL, NULL); + } + #if PG_VERSION_NUM >= PG_VERSION_14 DeferredErrorMessage *CTEWithSearchClauseError = ErrorIfQueryHasCTEWithSearchClause(query); @@ -3797,8 +3838,7 @@ ErrorIfQueryHasUnroutableModifyingCTE(Query *queryTree) CitusTableCacheEntry *modificationTableCacheEntry = GetCitusTableCacheEntry(distributedTableId); - if (IsCitusTableTypeCacheEntry(modificationTableCacheEntry, - CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKeyCacheEntry(modificationTableCacheEntry)) { return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, "cannot router plan modification of a non-distributed table", diff --git a/src/backend/distributed/planner/query_colocation_checker.c b/src/backend/distributed/planner/query_colocation_checker.c index b7cc41068..c5de0ef9e 100644 --- a/src/backend/distributed/planner/query_colocation_checker.c +++ b/src/backend/distributed/planner/query_colocation_checker.c @@ -168,7 +168,7 @@ AnchorRte(Query *subquery) { Oid relationId = currentRte->relid; - if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (IsCitusTable(relationId) && !HasDistributionKey(relationId)) { /* * Non-distributed tables should not be the anchor rte since they diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index 5cae19497..cbe6a3606 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -591,10 +591,16 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, } else if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext)) { + StringInfo errorMessage = makeStringInfo(); + bool isMergeCmd = IsMergeQuery(originalQuery); + appendStringInfo(errorMessage, + "%s" + "only supported when all distributed tables are " + "co-located and joined on their distribution columns", + isMergeCmd ? "MERGE command is " : "complex joins are "); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "complex joins are only supported when all distributed tables are " - "co-located and joined on their distribution columns", - NULL, NULL); + errorMessage->data, NULL, NULL); } /* we shouldn't allow reference tables in the FROM clause when the query has sublinks */ diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 713f1f4f2..d39c4affb 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -151,8 +151,9 @@ static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass secondClass); static Var * PartitionKeyForRTEIdentityInQuery(Query *query, int targetRTEIndex, Index *partitionKeyIndex); -static bool AllRelationsInRestrictionContextColocated(RelationRestrictionContext * - restrictionContext); +static bool AllDistributedRelationsInRestrictionContextColocated( + RelationRestrictionContext * + restrictionContext); static bool IsNotSafeRestrictionToRecursivelyPlan(Node *node); static JoinRestrictionContext * FilterJoinRestrictionContext( JoinRestrictionContext *joinRestrictionContext, Relids @@ -383,7 +384,7 @@ SafeToPushdownUnionSubquery(Query *originalQuery, return false; } - if (!AllRelationsInRestrictionContextColocated(restrictionContext)) + if (!AllDistributedRelationsInRestrictionContextColocated(restrictionContext)) { /* distribution columns are equal, but tables are not co-located */ return false; @@ -703,8 +704,8 @@ EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList, int rteIdentity = GetRTEIdentity(relationRestriction->rte); /* we shouldn't check for the equality of non-distributed tables */ - if (IsCitusTableType(relationRestriction->relationId, - CITUS_TABLE_WITH_NO_DIST_KEY)) + if (IsCitusTable(relationRestriction->relationId) && + !HasDistributionKey(relationRestriction->relationId)) { continue; } @@ -1919,22 +1920,66 @@ FindQueryContainingRTEIdentityInternal(Node *node, /* - * AllRelationsInRestrictionContextColocated determines whether all of the relations in the - * given relation restrictions list are co-located. + * AllDistributedRelationsInRestrictionContextColocated determines whether all of the + * distributed relations in the given relation restrictions list are co-located. */ static bool -AllRelationsInRestrictionContextColocated(RelationRestrictionContext *restrictionContext) +AllDistributedRelationsInRestrictionContextColocated( + RelationRestrictionContext *restrictionContext) { RelationRestriction *relationRestriction = NULL; - int initialColocationId = INVALID_COLOCATION_ID; + List *relationIdList = NIL; /* check whether all relations exists in the main restriction list */ foreach_ptr(relationRestriction, restrictionContext->relationRestrictionList) { - Oid relationId = relationRestriction->relationId; + relationIdList = lappend_oid(relationIdList, relationRestriction->relationId); + } - if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + return AllDistributedRelationsInListColocated(relationIdList); +} + + +/* + * AllDistributedRelationsInRTEListColocated determines whether all of the + * distributed relations in the given RangeTableEntry list are co-located. + */ +bool +AllDistributedRelationsInRTEListColocated(List *rangeTableEntryList) +{ + RangeTblEntry *rangeTableEntry = NULL; + List *relationIdList = NIL; + + foreach_ptr(rangeTableEntry, rangeTableEntryList) + { + relationIdList = lappend_oid(relationIdList, rangeTableEntry->relid); + } + + return AllDistributedRelationsInListColocated(relationIdList); +} + + +/* + * AllDistributedRelationsInListColocated determines whether all of the + * distributed relations in the given list are co-located. + */ +bool +AllDistributedRelationsInListColocated(List *relationList) +{ + int initialColocationId = INVALID_COLOCATION_ID; + Oid relationId = InvalidOid; + + foreach_oid(relationId, relationList) + { + if (!IsCitusTable(relationId)) { + /* not interested in Postgres tables */ + continue; + } + + if (!IsCitusTableType(relationId, DISTRIBUTED_TABLE)) + { + /* not interested in non-distributed tables */ continue; } diff --git a/src/backend/distributed/planner/shard_pruning.c b/src/backend/distributed/planner/shard_pruning.c index 665c9a75b..5375a70fa 100644 --- a/src/backend/distributed/planner/shard_pruning.c +++ b/src/backend/distributed/planner/shard_pruning.c @@ -333,7 +333,7 @@ PruneShards(Oid relationId, Index rangeTableId, List *whereClauseList, } /* short circuit for non-distributed tables such as reference table */ - if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKeyCacheEntry(cacheEntry)) { prunedList = ShardArrayToList(cacheEntry->sortedShardIntervalArray, cacheEntry->shardIntervalArrayLength); diff --git a/src/backend/distributed/replication/multi_logical_replication.c b/src/backend/distributed/replication/multi_logical_replication.c index 86b40bfba..e51329f22 100644 --- a/src/backend/distributed/replication/multi_logical_replication.c +++ b/src/backend/distributed/replication/multi_logical_replication.c @@ -88,6 +88,8 @@ static const char *replicationSlotPrefix[] = { * IMPORTANT: All the subscription names should start with "citus_". Otherwise * our utility hook does not defend against non-superusers altering or dropping * them, which is important for security purposes. + * + * We should also keep these in sync with IsCitusShardTransferBackend(). */ static const char *subscriptionPrefix[] = { [SHARD_MOVE] = "citus_shard_move_subscription_", @@ -1338,7 +1340,9 @@ CreatePublications(MultiConnection *connection, worker->groupId, CLEANUP_ALWAYS); + ExecuteCriticalRemoteCommand(connection, DISABLE_DDL_PROPAGATION); ExecuteCriticalRemoteCommand(connection, createPublicationCommand->data); + ExecuteCriticalRemoteCommand(connection, ENABLE_DDL_PROPAGATION); pfree(createPublicationCommand->data); pfree(createPublicationCommand); } diff --git a/src/backend/distributed/shardsplit/shardsplit_decoder.c b/src/backend/distributed/shardsplit/shardsplit_decoder.c index 51a56b36e..1386a21b0 100644 --- a/src/backend/distributed/shardsplit/shardsplit_decoder.c +++ b/src/backend/distributed/shardsplit/shardsplit_decoder.c @@ -10,19 +10,28 @@ #include "postgres.h" #include "distributed/shardinterval_utils.h" #include "distributed/shardsplit_shared_memory.h" +#include "distributed/worker_shard_visibility.h" +#include "distributed/worker_protocol.h" #include "distributed/listutils.h" +#include "distributed/metadata/distobject.h" #include "replication/logical.h" #include "utils/typcache.h" - +#include "utils/lsyscache.h" +#include "catalog/pg_namespace.h" extern void _PG_output_plugin_init(OutputPluginCallbacks *cb); -static LogicalDecodeChangeCB pgoutputChangeCB; +static LogicalDecodeChangeCB pgOutputPluginChangeCB; + +#define InvalidRepOriginId 0 static HTAB *SourceToDestinationShardMap = NULL; +static bool replication_origin_filter_cb(LogicalDecodingContext *ctx, RepOriginId + origin_id); /* Plugin callback */ -static void split_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, - Relation relation, ReorderBufferChange *change); +static void shard_split_change_cb(LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, + Relation relation, ReorderBufferChange *change); /* Helper methods */ static int32_t GetHashValueForIncomingTuple(Relation sourceShardRelation, @@ -47,9 +56,10 @@ void _PG_output_plugin_init(OutputPluginCallbacks *cb) { LogicalOutputPluginInit plugin_init = - (LogicalOutputPluginInit) (void *) load_external_function("pgoutput", - "_PG_output_plugin_init", - false, NULL); + (LogicalOutputPluginInit) (void *) + load_external_function("pgoutput", + "_PG_output_plugin_init", + false, NULL); if (plugin_init == NULL) { @@ -60,25 +70,56 @@ _PG_output_plugin_init(OutputPluginCallbacks *cb) plugin_init(cb); /* actual pgoutput callback will be called with the appropriate destination shard */ - pgoutputChangeCB = cb->change_cb; - cb->change_cb = split_change_cb; + pgOutputPluginChangeCB = cb->change_cb; + cb->change_cb = shard_split_change_cb; + cb->filter_by_origin_cb = replication_origin_filter_cb; } /* - * split_change function emits the incoming tuple change + * replication_origin_filter_cb call back function filters out publication of changes + * originated from any other node other than the current node. This is + * identified by the "origin_id" of the changes. The origin_id is set to + * a non-zero value in the origin node as part of WAL replication for internal + * operations like shard split/moves/create_distributed_table etc. + */ +static bool +replication_origin_filter_cb(LogicalDecodingContext *ctx, RepOriginId origin_id) +{ + return (origin_id != InvalidRepOriginId); +} + + +/* + * shard_split_change_cb function emits the incoming tuple change * to the appropriate destination shard. */ static void -split_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, - Relation relation, ReorderBufferChange *change) +shard_split_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, + Relation relation, ReorderBufferChange *change) { + /* + * If Citus has not been loaded yet, pass the changes + * through to the undrelying decoder plugin. + */ + if (!CitusHasBeenLoaded()) + { + pgOutputPluginChangeCB(ctx, txn, relation, change); + return; + } + + /* check if the relation is publishable.*/ if (!is_publishable_relation(relation)) { return; } char *replicationSlotName = ctx->slot->data.name.data; + if (replicationSlotName == NULL) + { + elog(ERROR, "Replication slot name is NULL!"); + return; + } /* * Initialize SourceToDestinationShardMap if not already initialized. @@ -198,7 +239,7 @@ split_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, } } - pgoutputChangeCB(ctx, txn, targetRelation, change); + pgOutputPluginChangeCB(ctx, txn, targetRelation, change); RelationClose(targetRelation); } diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index 3c67d9b78..c779b0a8d 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -74,6 +74,7 @@ #include "distributed/recursive_planning.h" #include "distributed/reference_table_utils.h" #include "distributed/relation_access_tracking.h" +#include "distributed/replication_origin_session_utils.h" #include "distributed/run_from_same_connection.h" #include "distributed/shard_cleaner.h" #include "distributed/shard_transfer.h" @@ -135,6 +136,8 @@ ReadColumnarOptions_type extern_ReadColumnarOptions = NULL; CppConcat(extern_, funcname) = \ (typename) (void *) lookup_external_function(handle, # funcname) +#define CDC_DECODER_DYNAMIC_LIB_PATH "$libdir/citus_decoders:$libdir" + DEFINE_COLUMNAR_PASSTHROUGH_FUNC(columnar_handler) DEFINE_COLUMNAR_PASSTHROUGH_FUNC(alter_columnar_table_set) DEFINE_COLUMNAR_PASSTHROUGH_FUNC(alter_columnar_table_reset) @@ -206,7 +209,7 @@ static bool StatisticsCollectionGucCheckHook(bool *newval, void **extra, GucSour source); static void CitusAuthHook(Port *port, int status); static bool IsSuperuser(char *userName); - +static void AdjustDynamicLibraryPathForCdcDecoders(void); static ClientAuthentication_hook_type original_client_auth_hook = NULL; @@ -359,6 +362,11 @@ static const struct config_enum_entry cpu_priority_options[] = { { NULL, 0, false} }; +static const struct config_enum_entry metadata_sync_mode_options[] = { + { "transactional", METADATA_SYNC_TRANSACTIONAL, false }, + { "nontransactional", METADATA_SYNC_NON_TRANSACTIONAL, false }, + { NULL, 0, false } +}; /* *INDENT-ON* */ @@ -469,6 +477,17 @@ _PG_init(void) InitializeLocallyReservedSharedConnections(); InitializeClusterClockMem(); + /* + * Adjust the Dynamic Library Path to prepend citus_decodes to the dynamic + * library path. This is needed to make sure that the citus decoders are + * loaded before the default decoders for CDC. + */ + if (EnableChangeDataCapture) + { + AdjustDynamicLibraryPathForCdcDecoders(); + } + + /* initialize shard split shared memory handle management */ InitializeShardSplitSMHandleManagement(); @@ -536,6 +555,22 @@ _PG_init(void) } +/* + * PrependCitusDecodersToDynamicLibrayPath prepends the $libdir/citus_decoders + * to the dynamic library path. This is needed to make sure that the citus + * decoders are loaded before the default decoders for CDC. + */ +static void +AdjustDynamicLibraryPathForCdcDecoders(void) +{ + if (strcmp(Dynamic_library_path, "$libdir") == 0) + { + SetConfigOption("dynamic_library_path", CDC_DECODER_DYNAMIC_LIB_PATH, + PGC_POSTMASTER, PGC_S_OVERRIDE); + } +} + + #if PG_VERSION_NUM >= PG_VERSION_15 /* @@ -1132,6 +1167,16 @@ RegisterCitusConfigVariables(void) GUC_STANDARD, NULL, NULL, NULL); + DefineCustomBoolVariable( + "citus.enable_change_data_capture", + gettext_noop("Enables using replication origin tracking for change data capture"), + NULL, + &EnableChangeDataCapture, + false, + PGC_USERSET, + GUC_STANDARD, + NULL, NULL, NULL); + DefineCustomBoolVariable( "citus.enable_cluster_clock", gettext_noop("When users explicitly call UDF citus_get_transaction_clock() " @@ -1268,6 +1313,26 @@ RegisterCitusConfigVariables(void) GUC_NO_SHOW_ALL, NULL, NULL, NULL); + DefineCustomBoolVariable( + "citus.enable_non_colocated_router_query_pushdown", + gettext_noop("Enables router planner for the queries that reference " + "non-colocated distributed tables."), + gettext_noop("Normally, router planner planner is only enabled for " + "the queries that reference colocated distributed tables " + "because it is not guaranteed to have the target shards " + "always on the same node, e.g., after rebalancing the " + "shards. For this reason, while enabling this flag allows " + "some degree of optimization for the queries that reference " + "non-colocated distributed tables, it is not guaranteed " + "that the same query will work after rebalancing the shards " + "or altering the shard count of one of those distributed " + "tables."), + &EnableNonColocatedRouterQueryPushdown, + true, + PGC_USERSET, + GUC_NO_SHOW_ALL, + NULL, NULL, NULL); + DefineCustomBoolVariable( "citus.enable_repartition_joins", gettext_noop("Allows Citus to repartition data between nodes."), @@ -1849,6 +1914,21 @@ RegisterCitusConfigVariables(void) GUC_UNIT_MS | GUC_NO_SHOW_ALL, NULL, NULL, NULL); + DefineCustomEnumVariable( + "citus.metadata_sync_mode", + gettext_noop("Sets transaction mode for metadata syncs."), + gettext_noop("metadata sync can be run inside a single coordinated " + "transaction or with multiple small transactions in " + "idempotent way. By default we sync metadata in single " + "coordinated transaction. When we hit memory problems " + "at workers, we have alternative nontransactional mode " + "where we send each command with separate transaction."), + &MetadataSyncTransMode, + METADATA_SYNC_TRANSACTIONAL, metadata_sync_mode_options, + PGC_SUSET, + GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL, + NULL, NULL, NULL); + DefineCustomIntVariable( "citus.metadata_sync_retry_interval", gettext_noop("Sets the interval to retry failed metadata syncs."), @@ -2406,7 +2486,6 @@ RegisterCitusConfigVariables(void) GUC_STANDARD, NULL, NULL, NULL); - /* warn about config items in the citus namespace that are not registered above */ EmitWarningsOnPlaceholders("citus"); diff --git a/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql b/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql index 981c5f375..bbaf0ce4d 100644 --- a/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql +++ b/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql @@ -1,4 +1,12 @@ -- citus--11.2-1--11.3-1 +#include "udfs/repl_origin_helper/11.3-1.sql" +#include "udfs/worker_adjust_identity_column_seq_ranges/11.3-1.sql" +ALTER TABLE pg_catalog.pg_dist_authinfo REPLICA IDENTITY USING INDEX pg_dist_authinfo_identification_index; +ALTER TABLE pg_catalog.pg_dist_partition REPLICA IDENTITY USING INDEX pg_dist_partition_logical_relid_index; +ALTER TABLE pg_catalog.pg_dist_placement REPLICA IDENTITY USING INDEX pg_dist_placement_placementid_index; +ALTER TABLE pg_catalog.pg_dist_rebalance_strategy REPLICA IDENTITY USING INDEX pg_dist_rebalance_strategy_name_key; +ALTER TABLE pg_catalog.pg_dist_shard REPLICA IDENTITY USING INDEX pg_dist_shard_shardid_index; +ALTER TABLE pg_catalog.pg_dist_transaction REPLICA IDENTITY USING INDEX pg_dist_transaction_unique_constraint; --- bump version to 11.3-1 - +#include "udfs/worker_drop_all_shell_tables/11.3-1.sql" +#include "udfs/citus_internal_mark_node_not_synced/11.3-1.sql" diff --git a/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql b/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql index 7d71235d7..322613e5f 100644 --- a/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql +++ b/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql @@ -1,2 +1,22 @@ -- citus--11.3-1--11.2-1 --- this is an empty downgrade path since citus--11.2-1--11.3-1.sql is empty for now + +DROP FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking(); +DROP FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking(); +DROP FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active(); +DROP FUNCTION IF EXISTS pg_catalog.worker_adjust_identity_column_seq_ranges(regclass); +ALTER TABLE pg_catalog.pg_dist_authinfo REPLICA IDENTITY NOTHING; +ALTER TABLE pg_catalog.pg_dist_partition REPLICA IDENTITY NOTHING; +ALTER TABLE pg_catalog.pg_dist_placement REPLICA IDENTITY NOTHING; +ALTER TABLE pg_catalog.pg_dist_rebalance_strategy REPLICA IDENTITY NOTHING; +ALTER TABLE pg_catalog.pg_dist_shard REPLICA IDENTITY NOTHING; +ALTER TABLE pg_catalog.pg_dist_transaction REPLICA IDENTITY NOTHING; + +ALTER TABLE pg_catalog.pg_dist_authinfo REPLICA IDENTITY NOTHING; +ALTER TABLE pg_catalog.pg_dist_partition REPLICA IDENTITY NOTHING; +ALTER TABLE pg_catalog.pg_dist_placement REPLICA IDENTITY NOTHING; +ALTER TABLE pg_catalog.pg_dist_rebalance_strategy REPLICA IDENTITY NOTHING; +ALTER TABLE pg_catalog.pg_dist_shard REPLICA IDENTITY NOTHING; +ALTER TABLE pg_catalog.pg_dist_transaction REPLICA IDENTITY NOTHING; + +DROP PROCEDURE pg_catalog.worker_drop_all_shell_tables(bool); +DROP FUNCTION pg_catalog.citus_internal_mark_node_not_synced(int, int); diff --git a/src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/11.3-1.sql b/src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/11.3-1.sql new file mode 100644 index 000000000..0d90c8f1a --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/11.3-1.sql @@ -0,0 +1,6 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_mark_node_not_synced(parent_pid int, nodeid int) + RETURNS VOID + LANGUAGE C STRICT + AS 'MODULE_PATHNAME', $$citus_internal_mark_node_not_synced$$; +COMMENT ON FUNCTION citus_internal_mark_node_not_synced(int, int) + IS 'marks given node not synced by unsetting metadatasynced column at the start of the nontransactional sync.'; diff --git a/src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/latest.sql b/src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/latest.sql new file mode 100644 index 000000000..0d90c8f1a --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/latest.sql @@ -0,0 +1,6 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_mark_node_not_synced(parent_pid int, nodeid int) + RETURNS VOID + LANGUAGE C STRICT + AS 'MODULE_PATHNAME', $$citus_internal_mark_node_not_synced$$; +COMMENT ON FUNCTION citus_internal_mark_node_not_synced(int, int) + IS 'marks given node not synced by unsetting metadatasynced column at the start of the nontransactional sync.'; diff --git a/src/backend/distributed/sql/udfs/repl_origin_helper/11.3-1.sql b/src/backend/distributed/sql/udfs/repl_origin_helper/11.3-1.sql new file mode 100644 index 000000000..5fe5a3bb9 --- /dev/null +++ b/src/backend/distributed/sql/udfs/repl_origin_helper/11.3-1.sql @@ -0,0 +1,20 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking() +RETURNS void +LANGUAGE C STRICT +AS 'MODULE_PATHNAME', $$citus_internal_start_replication_origin_tracking$$; +COMMENT ON FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking() + IS 'To start replication origin tracking for skipping publishing of duplicated events during internal data movements for CDC'; + +CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking() +RETURNS void +LANGUAGE C STRICT +AS 'MODULE_PATHNAME', $$citus_internal_stop_replication_origin_tracking$$; +COMMENT ON FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking() + IS 'To stop replication origin tracking for skipping publishing of duplicated events during internal data movements for CDC'; + +CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active() +RETURNS boolean +LANGUAGE C STRICT +AS 'MODULE_PATHNAME', $$citus_internal_is_replication_origin_tracking_active$$; +COMMENT ON FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active() + IS 'To check if replication origin tracking is active for skipping publishing of duplicated events during internal data movements for CDC'; diff --git a/src/backend/distributed/sql/udfs/repl_origin_helper/latest.sql b/src/backend/distributed/sql/udfs/repl_origin_helper/latest.sql new file mode 100644 index 000000000..5fe5a3bb9 --- /dev/null +++ b/src/backend/distributed/sql/udfs/repl_origin_helper/latest.sql @@ -0,0 +1,20 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking() +RETURNS void +LANGUAGE C STRICT +AS 'MODULE_PATHNAME', $$citus_internal_start_replication_origin_tracking$$; +COMMENT ON FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking() + IS 'To start replication origin tracking for skipping publishing of duplicated events during internal data movements for CDC'; + +CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking() +RETURNS void +LANGUAGE C STRICT +AS 'MODULE_PATHNAME', $$citus_internal_stop_replication_origin_tracking$$; +COMMENT ON FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking() + IS 'To stop replication origin tracking for skipping publishing of duplicated events during internal data movements for CDC'; + +CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active() +RETURNS boolean +LANGUAGE C STRICT +AS 'MODULE_PATHNAME', $$citus_internal_is_replication_origin_tracking_active$$; +COMMENT ON FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active() + IS 'To check if replication origin tracking is active for skipping publishing of duplicated events during internal data movements for CDC'; diff --git a/src/backend/distributed/sql/udfs/worker_adjust_identity_column_seq_ranges/11.3-1.sql b/src/backend/distributed/sql/udfs/worker_adjust_identity_column_seq_ranges/11.3-1.sql new file mode 100644 index 000000000..aecc43704 --- /dev/null +++ b/src/backend/distributed/sql/udfs/worker_adjust_identity_column_seq_ranges/11.3-1.sql @@ -0,0 +1,7 @@ +CREATE OR REPLACE FUNCTION pg_catalog.worker_adjust_identity_column_seq_ranges(regclass) + RETURNS VOID + LANGUAGE C STRICT + AS 'MODULE_PATHNAME', $$worker_adjust_identity_column_seq_ranges$$; +COMMENT ON FUNCTION pg_catalog.worker_adjust_identity_column_seq_ranges(regclass) + IS 'modify identity column seq ranges to produce globally unique values'; + diff --git a/src/backend/distributed/sql/udfs/worker_adjust_identity_column_seq_ranges/latest.sql b/src/backend/distributed/sql/udfs/worker_adjust_identity_column_seq_ranges/latest.sql new file mode 100644 index 000000000..aecc43704 --- /dev/null +++ b/src/backend/distributed/sql/udfs/worker_adjust_identity_column_seq_ranges/latest.sql @@ -0,0 +1,7 @@ +CREATE OR REPLACE FUNCTION pg_catalog.worker_adjust_identity_column_seq_ranges(regclass) + RETURNS VOID + LANGUAGE C STRICT + AS 'MODULE_PATHNAME', $$worker_adjust_identity_column_seq_ranges$$; +COMMENT ON FUNCTION pg_catalog.worker_adjust_identity_column_seq_ranges(regclass) + IS 'modify identity column seq ranges to produce globally unique values'; + diff --git a/src/backend/distributed/sql/udfs/worker_drop_all_shell_tables/11.3-1.sql b/src/backend/distributed/sql/udfs/worker_drop_all_shell_tables/11.3-1.sql new file mode 100644 index 000000000..55236286c --- /dev/null +++ b/src/backend/distributed/sql/udfs/worker_drop_all_shell_tables/11.3-1.sql @@ -0,0 +1,23 @@ + -- During metadata sync, when we send many ddls over single transaction, worker node can error due +-- to reaching at max allocation block size for invalidation messages. To find a workaround for the problem, +-- we added nontransactional metadata sync mode where we create many transaction while dropping shell tables +-- via https://github.com/citusdata/citus/pull/6728. +CREATE OR REPLACE PROCEDURE pg_catalog.worker_drop_all_shell_tables(singleTransaction bool DEFAULT true) +LANGUAGE plpgsql +AS $$ +DECLARE + table_name text; +BEGIN + -- drop shell tables within single or multiple transactions according to the flag singleTransaction + FOR table_name IN SELECT logicalrelid::regclass::text FROM pg_dist_partition + LOOP + PERFORM pg_catalog.worker_drop_shell_table(table_name); + IF not singleTransaction THEN + COMMIT; + END IF; + END LOOP; +END; +$$; +COMMENT ON PROCEDURE worker_drop_all_shell_tables(singleTransaction bool) + IS 'drop all distributed tables only without the metadata within single transaction or ' + 'multiple transaction specified by the flag singleTransaction'; diff --git a/src/backend/distributed/sql/udfs/worker_drop_all_shell_tables/latest.sql b/src/backend/distributed/sql/udfs/worker_drop_all_shell_tables/latest.sql new file mode 100644 index 000000000..55236286c --- /dev/null +++ b/src/backend/distributed/sql/udfs/worker_drop_all_shell_tables/latest.sql @@ -0,0 +1,23 @@ + -- During metadata sync, when we send many ddls over single transaction, worker node can error due +-- to reaching at max allocation block size for invalidation messages. To find a workaround for the problem, +-- we added nontransactional metadata sync mode where we create many transaction while dropping shell tables +-- via https://github.com/citusdata/citus/pull/6728. +CREATE OR REPLACE PROCEDURE pg_catalog.worker_drop_all_shell_tables(singleTransaction bool DEFAULT true) +LANGUAGE plpgsql +AS $$ +DECLARE + table_name text; +BEGIN + -- drop shell tables within single or multiple transactions according to the flag singleTransaction + FOR table_name IN SELECT logicalrelid::regclass::text FROM pg_dist_partition + LOOP + PERFORM pg_catalog.worker_drop_shell_table(table_name); + IF not singleTransaction THEN + COMMIT; + END IF; + END LOOP; +END; +$$; +COMMENT ON PROCEDURE worker_drop_all_shell_tables(singleTransaction bool) + IS 'drop all distributed tables only without the metadata within single transaction or ' + 'multiple transaction specified by the flag singleTransaction'; diff --git a/src/backend/distributed/test/metadata_sync.c b/src/backend/distributed/test/metadata_sync.c index b1c8a095c..46d2303d6 100644 --- a/src/backend/distributed/test/metadata_sync.c +++ b/src/backend/distributed/test/metadata_sync.c @@ -49,26 +49,23 @@ activate_node_snapshot(PG_FUNCTION_ARGS) */ WorkerNode *dummyWorkerNode = GetFirstPrimaryWorkerNode(); - List *updateLocalGroupCommand = - list_make1(LocalGroupIdUpdateCommand(dummyWorkerNode->groupId)); - List *syncDistObjCommands = SyncDistributedObjectsCommandList(dummyWorkerNode); - List *dropSnapshotCommands = NodeMetadataDropCommands(); - List *createSnapshotCommands = NodeMetadataCreateCommands(); - List *pgDistTableMetadataSyncCommands = PgDistTableMetadataSyncCommandList(); + /* + * Create MetadataSyncContext which is used throughout nodes' activation. + * As we set collectCommands to true, it would not create connections to workers. + * Instead it would collect and return sync commands to be sent to workers. + */ + bool collectCommands = true; + bool nodesAddedInSameTransaction = false; + MetadataSyncContext *context = CreateMetadataSyncContext(list_make1(dummyWorkerNode), + collectCommands, + nodesAddedInSameTransaction); - List *activateNodeCommandList = NIL; + ActivateNodeList(context); + + List *activateNodeCommandList = context->collectedCommands; int activateNodeCommandIndex = 0; Oid ddlCommandTypeId = TEXTOID; - activateNodeCommandList = list_concat(activateNodeCommandList, - updateLocalGroupCommand); - activateNodeCommandList = list_concat(activateNodeCommandList, syncDistObjCommands); - activateNodeCommandList = list_concat(activateNodeCommandList, dropSnapshotCommands); - activateNodeCommandList = list_concat(activateNodeCommandList, - createSnapshotCommands); - activateNodeCommandList = list_concat(activateNodeCommandList, - pgDistTableMetadataSyncCommands); - int activateNodeCommandCount = list_length(activateNodeCommandList); Datum *activateNodeCommandDatumArray = palloc0(activateNodeCommandCount * sizeof(Datum)); diff --git a/src/backend/distributed/test/shard_rebalancer.c b/src/backend/distributed/test/shard_rebalancer.c index 60603f091..56a063982 100644 --- a/src/backend/distributed/test/shard_rebalancer.c +++ b/src/backend/distributed/test/shard_rebalancer.c @@ -147,6 +147,26 @@ shard_placement_rebalance_array(PG_FUNCTION_ARGS) shardPlacementList = SortList(shardPlacementList, CompareShardPlacements); shardPlacementListList = lappend(shardPlacementListList, shardPlacementList); + List *unbalancedShards = NIL; + ListCell *shardPlacementListCell = NULL; + foreach(shardPlacementListCell, shardPlacementListList) + { + List *placementList = (List *) lfirst(shardPlacementListCell); + + if (list_length(placementList) < list_length(workerNodeList)) + { + unbalancedShards = list_concat(unbalancedShards, + placementList); + shardPlacementListList = foreach_delete_current(shardPlacementListList, + shardPlacementListCell); + } + } + + if (list_length(unbalancedShards) > 0) + { + shardPlacementListList = lappend(shardPlacementListList, unbalancedShards); + } + rebalancePlanFunctions.context = &context; /* sort the lists to make the function more deterministic */ diff --git a/src/backend/distributed/transaction/backend_data.c b/src/backend/distributed/transaction/backend_data.c index 47d10353f..fc89fde9a 100644 --- a/src/backend/distributed/transaction/backend_data.c +++ b/src/backend/distributed/transaction/backend_data.c @@ -1270,23 +1270,6 @@ MyBackendGotCancelledDueToDeadlock(bool clearState) } -/* - * MyBackendIsInDisributedTransaction returns true if MyBackendData - * is in a distributed transaction. - */ -bool -MyBackendIsInDisributedTransaction(void) -{ - /* backend might not have used citus yet and thus not initialized backend data */ - if (!MyBackendData) - { - return false; - } - - return IsInDistributedTransaction(MyBackendData); -} - - /* * ActiveDistributedTransactionNumbers returns a list of pointers to * transaction numbers of distributed transactions that are in progress @@ -1452,6 +1435,21 @@ IsExternalClientBackend(void) } +/* + * IsRebalancerInitiatedBackend returns true if we are in a backend that citus + * rebalancer initiated. + */ +bool +IsCitusShardTransferBackend(void) +{ + int prefixLength = strlen(CITUS_SHARD_TRANSFER_APPLICATION_NAME_PREFIX); + + return strncmp(application_name, + CITUS_SHARD_TRANSFER_APPLICATION_NAME_PREFIX, + prefixLength) == 0; +} + + /* * DetermineCitusBackendType determines the type of backend based on the application_name. */ diff --git a/src/backend/distributed/transaction/relation_access_tracking.c b/src/backend/distributed/transaction/relation_access_tracking.c index a6a8ba5f6..2ecbba5b7 100644 --- a/src/backend/distributed/transaction/relation_access_tracking.c +++ b/src/backend/distributed/transaction/relation_access_tracking.c @@ -195,7 +195,7 @@ RecordRelationAccessIfNonDistTable(Oid relationId, ShardPlacementAccessType acce * recursively calling RecordRelationAccessBase(), so becareful about * removing this check. */ - if (!IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (IsCitusTable(relationId) && HasDistributionKey(relationId)) { return; } @@ -732,8 +732,8 @@ CheckConflictingRelationAccesses(Oid relationId, ShardPlacementAccessType access CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId); - if (!(IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY) && - cacheEntry->referencingRelationsViaForeignKey != NIL)) + if (HasDistributionKeyCacheEntry(cacheEntry) || + cacheEntry->referencingRelationsViaForeignKey == NIL) { return; } @@ -931,7 +931,7 @@ HoldsConflictingLockWithReferencedRelations(Oid relationId, ShardPlacementAccess * We're only interested in foreign keys to reference tables and citus * local tables. */ - if (!IsCitusTableType(referencedRelation, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (IsCitusTable(referencedRelation) && HasDistributionKey(referencedRelation)) { continue; } @@ -993,7 +993,7 @@ HoldsConflictingLockWithReferencingRelations(Oid relationId, ShardPlacementAcces CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId); bool holdsConflictingLocks = false; - Assert(IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)); + Assert(!HasDistributionKeyCacheEntry(cacheEntry)); Oid referencingRelation = InvalidOid; foreach_oid(referencingRelation, cacheEntry->referencingRelationsViaForeignKey) diff --git a/src/backend/distributed/transaction/transaction_management.c b/src/backend/distributed/transaction/transaction_management.c index 0f4c3c80a..5add48009 100644 --- a/src/backend/distributed/transaction/transaction_management.c +++ b/src/backend/distributed/transaction/transaction_management.c @@ -34,6 +34,7 @@ #include "distributed/multi_logical_replication.h" #include "distributed/multi_explain.h" #include "distributed/repartition_join_execution.h" +#include "distributed/replication_origin_session_utils.h" #include "distributed/transaction_management.h" #include "distributed/placement_connection.h" #include "distributed/relation_access_tracking.h" @@ -391,6 +392,9 @@ CoordinatedTransactionCallback(XactEvent event, void *arg) ResetGlobalVariables(); ResetRelationAccessHash(); + /* Reset any local replication origin session since transaction has been aborted.*/ + ResetReplicationOriginLocalSession(); + /* empty the CitusXactCallbackContext to ensure we're not leaking memory */ MemoryContextReset(CitusXactCallbackContext); @@ -715,6 +719,8 @@ CoordinatedSubTransactionCallback(SubXactEvent event, SubTransactionId subId, SetCreateCitusTransactionLevel(0); } + /* Reset any local replication origin session since subtransaction has been aborted.*/ + ResetReplicationOriginLocalSession(); MemoryContextSwitchTo(previousContext); break; diff --git a/src/backend/distributed/transaction/worker_transaction.c b/src/backend/distributed/transaction/worker_transaction.c index 486dd7280..b4a497647 100644 --- a/src/backend/distributed/transaction/worker_transaction.c +++ b/src/backend/distributed/transaction/worker_transaction.c @@ -374,6 +374,54 @@ SendCommandListToWorkerOutsideTransactionWithConnection(MultiConnection *workerC } +/* + * SendCommandListToWorkerListWithBareConnections sends the command list + * over the specified bare connections. This function is mainly useful to + * avoid opening an closing connections excessively by allowing reusing + * connections to send multiple separate bare commands. The function + * raises an error if any of the queries fail. + */ +void +SendCommandListToWorkerListWithBareConnections(List *workerConnectionList, + List *commandList) +{ + Assert(!InCoordinatedTransaction()); + Assert(!GetCoordinatedTransactionShouldUse2PC()); + + if (list_length(commandList) == 0 || list_length(workerConnectionList) == 0) + { + /* nothing to do */ + return; + } + + /* + * In order to avoid round-trips per query in queryStringList, + * we join the string and send as a single command. Also, + * if there is only a single command, avoid additional call to + * StringJoin given that some strings can be quite large. + */ + char *stringToSend = (list_length(commandList) == 1) ? + linitial(commandList) : StringJoin(commandList, ';'); + + /* send commands in parallel */ + MultiConnection *connection = NULL; + foreach_ptr(connection, workerConnectionList) + { + int querySent = SendRemoteCommand(connection, stringToSend); + if (querySent == 0) + { + ReportConnectionError(connection, ERROR); + } + } + + bool failOnError = true; + foreach_ptr(connection, workerConnectionList) + { + ClearResults(connection, failOnError); + } +} + + /* * SendCommandListToWorkerInCoordinatedTransaction opens connection to the node * with the given nodeName and nodePort. The commands are sent as part of the @@ -390,6 +438,8 @@ SendMetadataCommandListToWorkerListInCoordinatedTransaction(List *workerNodeList return; } + ErrorIfAnyMetadataNodeOutOfSync(workerNodeList); + UseCoordinatedTransaction(); List *connectionList = NIL; diff --git a/src/backend/distributed/utils/colocation_utils.c b/src/backend/distributed/utils/colocation_utils.c index aabfcdf62..985d4c38e 100644 --- a/src/backend/distributed/utils/colocation_utils.c +++ b/src/backend/distributed/utils/colocation_utils.c @@ -442,8 +442,7 @@ ShardsIntervalsEqual(ShardInterval *leftShardInterval, ShardInterval *rightShard { return HashPartitionedShardIntervalsEqual(leftShardInterval, rightShardInterval); } - else if (IsCitusTableType(leftShardInterval->relationId, - CITUS_TABLE_WITH_NO_DIST_KEY)) + else if (!HasDistributionKey(leftShardInterval->relationId)) { /* * Reference tables has only a single shard and all reference tables diff --git a/src/backend/distributed/utils/reference_table_utils.c b/src/backend/distributed/utils/reference_table_utils.c index 0b085c67a..687ce02a7 100644 --- a/src/backend/distributed/utils/reference_table_utils.c +++ b/src/backend/distributed/utils/reference_table_utils.c @@ -503,12 +503,11 @@ GetReferenceTableColocationId() /* - * DeleteAllReplicatedTablePlacementsFromNodeGroup function iterates over - * list of reference and replicated hash distributed tables and deletes - * all placements from pg_dist_placement table for given group. + * GetAllReplicatedTableList returns all tables which has replicated placements. + * i.e. (all reference tables) + (distributed tables with more than 1 placements) */ -void -DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly) +List * +GetAllReplicatedTableList(void) { List *referenceTableList = CitusTableTypeIdList(REFERENCE_TABLE); List *replicatedMetadataSyncedDistributedTableList = @@ -517,13 +516,25 @@ DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly) List *replicatedTableList = list_concat(referenceTableList, replicatedMetadataSyncedDistributedTableList); - /* if there are no reference tables, we do not need to do anything */ + return replicatedTableList; +} + + +/* + * ReplicatedPlacementsForNodeGroup filters all replicated placements for given + * node group id. + */ +List * +ReplicatedPlacementsForNodeGroup(int32 groupId) +{ + List *replicatedTableList = GetAllReplicatedTableList(); + if (list_length(replicatedTableList) == 0) { - return; + return NIL; } - StringInfo deletePlacementCommand = makeStringInfo(); + List *replicatedPlacementsForNodeGroup = NIL; Oid replicatedTableId = InvalidOid; foreach_oid(replicatedTableId, replicatedTableList) { @@ -538,25 +549,104 @@ DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly) continue; } - GroupShardPlacement *placement = NULL; - foreach_ptr(placement, placements) - { - LockShardDistributionMetadata(placement->shardId, ExclusiveLock); - - DeleteShardPlacementRow(placement->placementId); - - if (!localOnly) - { - resetStringInfo(deletePlacementCommand); - appendStringInfo(deletePlacementCommand, - "DELETE FROM pg_catalog.pg_dist_placement " - "WHERE placementid = " UINT64_FORMAT, - placement->placementId); - - SendCommandToWorkersWithMetadata(deletePlacementCommand->data); - } - } + replicatedPlacementsForNodeGroup = list_concat(replicatedPlacementsForNodeGroup, + placements); } + + return replicatedPlacementsForNodeGroup; +} + + +/* + * DeleteShardPlacementCommand returns a command for deleting given placement from + * metadata. + */ +char * +DeleteShardPlacementCommand(uint64 placementId) +{ + StringInfo deletePlacementCommand = makeStringInfo(); + appendStringInfo(deletePlacementCommand, + "DELETE FROM pg_catalog.pg_dist_placement " + "WHERE placementid = " UINT64_FORMAT, placementId); + return deletePlacementCommand->data; +} + + +/* + * DeleteAllReplicatedTablePlacementsFromNodeGroup function iterates over + * list of reference and replicated hash distributed tables and deletes + * all placements from pg_dist_placement table for given group. + */ +void +DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly) +{ + List *replicatedPlacementListForGroup = ReplicatedPlacementsForNodeGroup(groupId); + + /* if there are no replicated tables for the group, we do not need to do anything */ + if (list_length(replicatedPlacementListForGroup) == 0) + { + return; + } + + GroupShardPlacement *placement = NULL; + foreach_ptr(placement, replicatedPlacementListForGroup) + { + LockShardDistributionMetadata(placement->shardId, ExclusiveLock); + + if (!localOnly) + { + char *deletePlacementCommand = + DeleteShardPlacementCommand(placement->placementId); + + SendCommandToWorkersWithMetadata(deletePlacementCommand); + } + + DeleteShardPlacementRow(placement->placementId); + } +} + + +/* + * DeleteAllReplicatedTablePlacementsFromNodeGroupViaMetadataContext does the same as + * DeleteAllReplicatedTablePlacementsFromNodeGroup except it uses metadataSyncContext for + * connections. + */ +void +DeleteAllReplicatedTablePlacementsFromNodeGroupViaMetadataContext( + MetadataSyncContext *context, int32 groupId, bool localOnly) +{ + List *replicatedPlacementListForGroup = ReplicatedPlacementsForNodeGroup(groupId); + + /* if there are no replicated tables for the group, we do not need to do anything */ + if (list_length(replicatedPlacementListForGroup) == 0) + { + return; + } + + MemoryContext oldContext = MemoryContextSwitchTo(context->context); + GroupShardPlacement *placement = NULL; + foreach_ptr(placement, replicatedPlacementListForGroup) + { + LockShardDistributionMetadata(placement->shardId, ExclusiveLock); + + if (!localOnly) + { + char *deletePlacementCommand = + DeleteShardPlacementCommand(placement->placementId); + + SendOrCollectCommandListToMetadataNodes(context, + list_make1(deletePlacementCommand)); + } + + /* do not execute local transaction if we collect commands */ + if (!MetadataSyncCollectsCommands(context)) + { + DeleteShardPlacementRow(placement->placementId); + } + + ResetMetadataSyncMemoryContext(context); + } + MemoryContextSwitchTo(oldContext); } diff --git a/src/backend/distributed/utils/replication_origin_session_utils.c b/src/backend/distributed/utils/replication_origin_session_utils.c new file mode 100644 index 000000000..dbd244271 --- /dev/null +++ b/src/backend/distributed/utils/replication_origin_session_utils.c @@ -0,0 +1,239 @@ +/*------------------------------------------------------------------------- + * + * replication_origin_session_utils.c + * Functions for managing replication origin session. + * + * Copyright (c) Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#include "distributed/replication_origin_session_utils.h" +#include "distributed/remote_commands.h" +#include "distributed/metadata_cache.h" +#include "utils/builtins.h" +#include "miscadmin.h" + +static bool IsRemoteReplicationOriginSessionSetup(MultiConnection *connection); + +static void SetupMemoryContextResetReplicationOriginHandler(void); + +static void SetupReplicationOriginSessionHelper(bool isContexResetSetupNeeded); + +static inline bool IsLocalReplicationOriginSessionActive(void); + +PG_FUNCTION_INFO_V1(citus_internal_start_replication_origin_tracking); +PG_FUNCTION_INFO_V1(citus_internal_stop_replication_origin_tracking); +PG_FUNCTION_INFO_V1(citus_internal_is_replication_origin_tracking_active); + +/* + * This variable is used to remember the replication origin id of the current session + * before resetting it to DoNotReplicateId in SetupReplicationOriginLocalSession. + */ +static RepOriginId OriginalOriginId = InvalidRepOriginId; + +/* + * Setting that controls whether replication origin tracking is enabled + */ +bool EnableChangeDataCapture = false; + + +/* citus_internal_start_replication_origin_tracking starts a new replication origin session + * in the local node. This function is used to avoid publishing the WAL records to the + * replication slot by setting replication origin to DoNotReplicateId in WAL records. + * It remembers the previous replication origin for the current session which will be + * used to reset the replication origin to the previous value when the session ends. + */ +Datum +citus_internal_start_replication_origin_tracking(PG_FUNCTION_ARGS) +{ + if (!EnableChangeDataCapture) + { + PG_RETURN_VOID(); + } + SetupReplicationOriginSessionHelper(false); + PG_RETURN_VOID(); +} + + +/* citus_internal_stop_replication_origin_tracking ends the current replication origin session + * in the local node. This function is used to reset the replication origin to the + * earlier value of replication origin. + */ +Datum +citus_internal_stop_replication_origin_tracking(PG_FUNCTION_ARGS) +{ + ResetReplicationOriginLocalSession(); + PG_RETURN_VOID(); +} + + +/* citus_internal_is_replication_origin_tracking_active checks if the current replication origin + * session is active in the local node. + */ +Datum +citus_internal_is_replication_origin_tracking_active(PG_FUNCTION_ARGS) +{ + bool result = IsLocalReplicationOriginSessionActive(); + PG_RETURN_BOOL(result); +} + + +/* IsLocalReplicationOriginSessionActive checks if the current replication origin + * session is active in the local node. + */ +inline bool +IsLocalReplicationOriginSessionActive(void) +{ + return (replorigin_session_origin == DoNotReplicateId); +} + + +/* + * SetupMemoryContextResetReplicationOriginHandler registers a callback function + * that resets the replication origin session in case of any error for the current + * memory context. + */ +static void +SetupMemoryContextResetReplicationOriginHandler() +{ + MemoryContextCallback *replicationOriginResetCallback = palloc0( + sizeof(MemoryContextCallback)); + replicationOriginResetCallback->func = + ResetReplicationOriginLocalSessionCallbackHandler; + replicationOriginResetCallback->arg = NULL; + MemoryContextRegisterResetCallback(CurrentMemoryContext, + replicationOriginResetCallback); +} + + +/* + * SetupReplicationOriginSessionHelper sets up a new replication origin session in a + * local session. It takes an argument isContexResetSetupNeeded to decide whether + * to register a callback function that resets the replication origin session in case + * of any error for the current memory context. + */ +static void +SetupReplicationOriginSessionHelper(bool isContexResetSetupNeeded) +{ + if (!EnableChangeDataCapture) + { + return; + } + OriginalOriginId = replorigin_session_origin; + replorigin_session_origin = DoNotReplicateId; + if (isContexResetSetupNeeded) + { + SetupMemoryContextResetReplicationOriginHandler(); + } +} + + +/* + * SetupReplicationOriginLocalSession sets up a new replication origin session in a + * local session. + */ +void +SetupReplicationOriginLocalSession() +{ + SetupReplicationOriginSessionHelper(true); +} + + +/* + * ResetReplicationOriginLocalSession resets the replication origin session in a + * local node. + */ +void +ResetReplicationOriginLocalSession(void) +{ + if (replorigin_session_origin != DoNotReplicateId) + { + return; + } + + replorigin_session_origin = OriginalOriginId; +} + + +/* + * ResetReplicationOriginLocalSessionCallbackHandler is a callback function that + * resets the replication origin session in a local node. This is used to register + * with MemoryContextRegisterResetCallback to reset the replication origin session + * in case of any error for the given memory context. + */ +void +ResetReplicationOriginLocalSessionCallbackHandler(void *arg) +{ + ResetReplicationOriginLocalSession(); +} + + +/* + * SetupReplicationOriginRemoteSession sets up a new replication origin session in a + * remote session. The identifier is used to create a unique replication origin name + * for the session in the remote node. + */ +void +SetupReplicationOriginRemoteSession(MultiConnection *connection) +{ + if (!EnableChangeDataCapture) + { + return; + } + if (connection != NULL && !IsRemoteReplicationOriginSessionSetup(connection)) + { + StringInfo replicationOriginSessionSetupQuery = makeStringInfo(); + appendStringInfo(replicationOriginSessionSetupQuery, + "select pg_catalog.citus_internal_start_replication_origin_tracking();"); + ExecuteCriticalRemoteCommand(connection, + replicationOriginSessionSetupQuery->data); + connection->isReplicationOriginSessionSetup = true; + } +} + + +/* + * ResetReplicationOriginRemoteSession resets the replication origin session in a + * remote node. + */ +void +ResetReplicationOriginRemoteSession(MultiConnection *connection) +{ + if (connection != NULL && connection->isReplicationOriginSessionSetup) + { + StringInfo replicationOriginSessionResetQuery = makeStringInfo(); + appendStringInfo(replicationOriginSessionResetQuery, + "select pg_catalog.citus_internal_stop_replication_origin_tracking();"); + ExecuteCriticalRemoteCommand(connection, + replicationOriginSessionResetQuery->data); + connection->isReplicationOriginSessionSetup = false; + } +} + + +/* + * IsRemoteReplicationOriginSessionSetup checks if the replication origin is setup + * already in the remote session by calliing the UDF + * citus_internal_is_replication_origin_tracking_active(). This is also remembered + * in the connection object to avoid calling the UDF again next time. + */ +static bool +IsRemoteReplicationOriginSessionSetup(MultiConnection *connection) +{ + if (connection->isReplicationOriginSessionSetup) + { + return true; + } + + StringInfo isReplicationOriginSessionSetupQuery = makeStringInfo(); + appendStringInfo(isReplicationOriginSessionSetupQuery, + "SELECT pg_catalog.citus_internal_is_replication_origin_tracking_active()"); + bool result = + ExecuteRemoteCommandAndCheckResult(connection, + isReplicationOriginSessionSetupQuery->data, + "t"); + + connection->isReplicationOriginSessionSetup = result; + return result; +} diff --git a/src/backend/distributed/utils/resource_lock.c b/src/backend/distributed/utils/resource_lock.c index cc4cb0d88..7b8edf758 100644 --- a/src/backend/distributed/utils/resource_lock.c +++ b/src/backend/distributed/utils/resource_lock.c @@ -503,45 +503,6 @@ SetLocktagForShardDistributionMetadata(int64 shardId, LOCKTAG *tag) } -/* - * LockPlacementCleanup takes an exclusive lock to ensure that only one process - * can cleanup placements at the same time. - */ -void -LockPlacementCleanup(void) -{ - LOCKTAG tag; - const bool sessionLock = false; - const bool dontWait = false; - - /* Moves acquire lock with a constant operation id CITUS_SHARD_MOVE. - * This will change as we add support for parallel moves. - */ - SET_LOCKTAG_CITUS_OPERATION(tag, CITUS_SHARD_MOVE); - (void) LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait); -} - - -/* - * TryLockPlacementCleanup takes an exclusive lock to ensure that only one - * process can cleanup placements at the same time. - */ -bool -TryLockPlacementCleanup(void) -{ - LOCKTAG tag; - const bool sessionLock = false; - const bool dontWait = true; - - /* Moves acquire lock with a constant operation id CITUS_SHARD_MOVE. - * This will change as we add support for parallel moves. - */ - SET_LOCKTAG_CITUS_OPERATION(tag, CITUS_SHARD_MOVE); - bool lockAcquired = LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait); - return lockAcquired; -} - - /* * LockReferencedReferenceShardDistributionMetadata acquires shard distribution * metadata locks with the given lock mode on the reference tables which has a diff --git a/src/backend/distributed/utils/shardinterval_utils.c b/src/backend/distributed/utils/shardinterval_utils.c index 2980d11a4..12635f9f4 100644 --- a/src/backend/distributed/utils/shardinterval_utils.c +++ b/src/backend/distributed/utils/shardinterval_utils.c @@ -223,8 +223,7 @@ ShardIndex(ShardInterval *shardInterval) * currently it is not required. */ if (!IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) && - !IsCitusTableTypeCacheEntry( - cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + HasDistributionKeyCacheEntry(cacheEntry)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("finding index of a given shard is only supported for " @@ -233,7 +232,7 @@ ShardIndex(ShardInterval *shardInterval) } /* short-circuit for reference tables */ - if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + if (!HasDistributionKeyCacheEntry(cacheEntry)) { /* * Reference tables and citus local tables have only a single shard, @@ -333,7 +332,7 @@ FindShardIntervalIndex(Datum searchedValue, CitusTableCacheEntry *cacheEntry) shardIndex = CalculateUniformHashRangeIndex(hashedValue, shardCount); } } - else if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) + else if (!HasDistributionKeyCacheEntry(cacheEntry)) { /* non-distributed tables have a single shard, all values mapped to that shard */ Assert(shardCount == 1); diff --git a/src/backend/distributed/worker/worker_create_or_replace.c b/src/backend/distributed/worker/worker_create_or_replace.c index 572cf1420..b40f712b5 100644 --- a/src/backend/distributed/worker/worker_create_or_replace.c +++ b/src/backend/distributed/worker/worker_create_or_replace.c @@ -35,8 +35,22 @@ #include "distributed/worker_create_or_replace.h" #include "distributed/worker_protocol.h" + +/* + * OnCollisionAction describes what to do when the created object + * and existing object do not match. + */ +typedef enum OnCollisionAction +{ + ON_COLLISION_RENAME, + ON_COLLISION_DROP +} OnCollisionAction; + + static List * CreateStmtListByObjectAddress(const ObjectAddress *address); static bool CompareStringList(List *list1, List *list2); +static OnCollisionAction GetOnCollisionAction(const ObjectAddress *address); + PG_FUNCTION_INFO_V1(worker_create_or_replace_object); PG_FUNCTION_INFO_V1(worker_create_or_replace_object_array); @@ -192,7 +206,8 @@ WorkerCreateOrReplaceObject(List *sqlStatements) /* * Object with name from statement is already found locally, check if states are * identical. If objects differ we will rename the old object (non- destructively) - * as to make room to create the new object according to the spec sent. + * or drop it (if safe) as to make room to create the new object according to the + * spec sent. */ /* @@ -213,11 +228,22 @@ WorkerCreateOrReplaceObject(List *sqlStatements) return false; } - char *newName = GenerateBackupNameForCollision(address); + Node *utilityStmt = NULL; - RenameStmt *renameStmt = CreateRenameStatement(address, newName); - const char *sqlRenameStmt = DeparseTreeNode((Node *) renameStmt); - ProcessUtilityParseTree((Node *) renameStmt, sqlRenameStmt, + if (GetOnCollisionAction(address) == ON_COLLISION_DROP) + { + /* drop the existing object */ + utilityStmt = (Node *) CreateDropStmt(address); + } + else + { + /* rename the existing object */ + char *newName = GenerateBackupNameForCollision(address); + utilityStmt = (Node *) CreateRenameStatement(address, newName); + } + + const char *commandString = DeparseTreeNode(utilityStmt); + ProcessUtilityParseTree(utilityStmt, commandString, PROCESS_UTILITY_QUERY, NULL, None_Receiver, NULL); } @@ -286,6 +312,11 @@ CreateStmtListByObjectAddress(const ObjectAddress *address) return list_make1(GetFunctionDDLCommand(address->objectId, false)); } + case OCLASS_PUBLICATION: + { + return list_make1(CreatePublicationDDLCommand(address->objectId)); + } + case OCLASS_TSCONFIG: { List *stmts = GetCreateTextSearchConfigStatements(address); @@ -312,6 +343,37 @@ CreateStmtListByObjectAddress(const ObjectAddress *address) } +/* + * GetOnCollisionAction decides what to do when the object already exists. + */ +static OnCollisionAction +GetOnCollisionAction(const ObjectAddress *address) +{ + switch (getObjectClass(address)) + { + case OCLASS_PUBLICATION: + { + /* + * We prefer to drop publications because they can be + * harmful (cause update/delete failures) and are relatively + * safe to drop. + */ + return ON_COLLISION_DROP; + } + + case OCLASS_COLLATION: + case OCLASS_PROC: + case OCLASS_TSCONFIG: + case OCLASS_TSDICT: + case OCLASS_TYPE: + default: + { + return ON_COLLISION_RENAME; + } + } +} + + /* * GenerateBackupNameForCollision calculate a backup name for a given object by its * address. This name should be used when renaming an existing object before creating the @@ -362,6 +424,64 @@ GenerateBackupNameForCollision(const ObjectAddress *address) } +/* + * CreateDropPublicationStmt creates a DROP PUBLICATION statement for the + * publication at the given address. + */ +static DropStmt * +CreateDropPublicationStmt(const ObjectAddress *address) +{ + Assert(address->classId == PublicationRelationId); + + DropStmt *dropStmt = makeNode(DropStmt); + dropStmt->removeType = OBJECT_PUBLICATION; + dropStmt->behavior = DROP_RESTRICT; + + HeapTuple publicationTuple = + SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(address->objectId)); + + if (!HeapTupleIsValid(publicationTuple)) + { + ereport(ERROR, (errmsg("cannot find publication with oid: %d", + address->objectId))); + } + + Form_pg_publication publicationForm = + (Form_pg_publication) GETSTRUCT(publicationTuple); + + char *publicationName = NameStr(publicationForm->pubname); + dropStmt->objects = list_make1(makeString(publicationName)); + + ReleaseSysCache(publicationTuple); + + return dropStmt; +} + + +/* + * CreateDropStmt returns a DROP statement for the given object. + */ +DropStmt * +CreateDropStmt(const ObjectAddress *address) +{ + switch (getObjectClass(address)) + { + case OCLASS_PUBLICATION: + { + return CreateDropPublicationStmt(address); + } + + default: + { + break; + } + } + + ereport(ERROR, (errmsg("unsupported object to construct a drop statement"), + errdetail("unable to generate a parsetree for the drop"))); +} + + /* * CreateRenameTypeStmt creates a rename statement for a type based on its ObjectAddress. * The rename statement will rename the existing object on its address to the value diff --git a/src/backend/distributed/worker/worker_data_fetch_protocol.c b/src/backend/distributed/worker/worker_data_fetch_protocol.c index d563c443b..11fdda287 100644 --- a/src/backend/distributed/worker/worker_data_fetch_protocol.c +++ b/src/backend/distributed/worker/worker_data_fetch_protocol.c @@ -70,6 +70,7 @@ static void AlterSequenceMinMax(Oid sequenceId, char *schemaName, char *sequence PG_FUNCTION_INFO_V1(worker_apply_shard_ddl_command); PG_FUNCTION_INFO_V1(worker_apply_inter_shard_ddl_command); PG_FUNCTION_INFO_V1(worker_apply_sequence_command); +PG_FUNCTION_INFO_V1(worker_adjust_identity_column_seq_ranges); PG_FUNCTION_INFO_V1(worker_append_table_to_shard); PG_FUNCTION_INFO_V1(worker_nextval); @@ -133,6 +134,60 @@ worker_apply_inter_shard_ddl_command(PG_FUNCTION_ARGS) } +/* + * worker_adjust_identity_column_seq_ranges takes a table oid, runs an ALTER SEQUENCE statement + * for each identity column to adjust the minvalue and maxvalue of the sequence owned by + * identity column such that the sequence creates globally unique values. + * We use table oid instead of sequence name to avoid any potential conflicts between sequences of different tables. This way, we can safely iterate through identity columns on a specific table without any issues. While this may introduce a small amount of business logic to workers, it's a much safer approach overall. + */ +Datum +worker_adjust_identity_column_seq_ranges(PG_FUNCTION_ARGS) +{ + CheckCitusVersion(ERROR); + + Oid tableRelationId = PG_GETARG_OID(0); + + EnsureTableOwner(tableRelationId); + + Relation tableRelation = relation_open(tableRelationId, AccessShareLock); + TupleDesc tableTupleDesc = RelationGetDescr(tableRelation); + + bool missingSequenceOk = false; + + for (int attributeIndex = 0; attributeIndex < tableTupleDesc->natts; + attributeIndex++) + { + Form_pg_attribute attributeForm = TupleDescAttr(tableTupleDesc, + attributeIndex); + + /* skip dropped columns */ + if (attributeForm->attisdropped) + { + continue; + } + + if (attributeForm->attidentity) + { + Oid sequenceOid = getIdentitySequence(tableRelationId, + attributeForm->attnum, + missingSequenceOk); + + Oid sequenceSchemaOid = get_rel_namespace(sequenceOid); + char *sequenceSchemaName = get_namespace_name(sequenceSchemaOid); + char *sequenceName = get_rel_name(sequenceOid); + Oid sequenceTypeId = pg_get_sequencedef(sequenceOid)->seqtypid; + + AlterSequenceMinMax(sequenceOid, sequenceSchemaName, sequenceName, + sequenceTypeId); + } + } + + relation_close(tableRelation, NoLock); + + PG_RETURN_VOID(); +} + + /* * worker_apply_sequence_command takes a CREATE SEQUENCE command string, runs the * CREATE SEQUENCE command then creates and runs an ALTER SEQUENCE statement diff --git a/src/backend/distributed/worker/worker_shard_visibility.c b/src/backend/distributed/worker/worker_shard_visibility.c index 9d041f4a9..63a9cca34 100644 --- a/src/backend/distributed/worker/worker_shard_visibility.c +++ b/src/backend/distributed/worker/worker_shard_visibility.c @@ -351,18 +351,17 @@ ShouldHideShardsInternal(void) return false; } } - else if (MyBackendType != B_BACKEND) + else if (MyBackendType != B_BACKEND && MyBackendType != B_WAL_SENDER) { /* * We are aiming only to hide shards from client * backends or certain background workers(see above), - * not backends like walsender or checkpointer. */ return false; } if (IsCitusInternalBackend() || IsRebalancerInternalBackend() || - IsCitusRunCommandBackend()) + IsCitusRunCommandBackend() || IsCitusShardTransferBackend()) { /* we never hide shards from Citus */ return false; diff --git a/src/include/distributed/adaptive_executor.h b/src/include/distributed/adaptive_executor.h index 0a4735ee7..7198858a0 100644 --- a/src/include/distributed/adaptive_executor.h +++ b/src/include/distributed/adaptive_executor.h @@ -14,7 +14,6 @@ extern int ExecutorSlowStartInterval; extern bool EnableCostBasedConnectionEstablishment; extern bool PreventIncompleteConnectionEstablishment; -extern bool ShouldRunTasksSequentially(List *taskList); extern uint64 ExecuteTaskList(RowModifyLevel modLevel, List *taskList); extern uint64 ExecuteUtilityTaskList(List *utilityTaskList, bool localExecutionSupported); extern uint64 ExecuteUtilityTaskListExtended(List *utilityTaskList, int poolSize, diff --git a/src/include/distributed/backend_data.h b/src/include/distributed/backend_data.h index 0fb41b7e7..1fcd31141 100644 --- a/src/include/distributed/backend_data.h +++ b/src/include/distributed/backend_data.h @@ -66,7 +66,6 @@ extern int ExtractProcessIdFromGlobalPID(uint64 globalPID); extern void GetBackendDataForProc(PGPROC *proc, BackendData *result); extern void CancelTransactionDueToDeadlock(PGPROC *proc); extern bool MyBackendGotCancelledDueToDeadlock(bool clearState); -extern bool MyBackendIsInDisributedTransaction(void); extern List * ActiveDistributedTransactionNumbers(void); extern LocalTransactionId GetMyProcLocalTransactionId(void); extern int GetExternalClientBackendCount(void); @@ -77,6 +76,7 @@ extern bool IsCitusInternalBackend(void); extern bool IsRebalancerInternalBackend(void); extern bool IsCitusRunCommandBackend(void); extern bool IsExternalClientBackend(void); +extern bool IsCitusShardTransferBackend(void); #define INVALID_CITUS_INTERNAL_BACKEND_GPID 0 #define GLOBAL_PID_NODE_ID_FOR_NODES_NOT_IN_METADATA 99999999 diff --git a/src/include/distributed/commands.h b/src/include/distributed/commands.h index c3ec4fafb..dba63c659 100644 --- a/src/include/distributed/commands.h +++ b/src/include/distributed/commands.h @@ -409,6 +409,24 @@ extern void DropPolicyEventExtendNames(DropStmt *stmt, const char *schemaName, u extern void AddRangeTableEntryToQueryCompat(ParseState *parseState, Relation relation); +/* publication.c - forward declarations */ +extern List * PostProcessCreatePublicationStmt(Node *node, const char *queryString); +extern List * CreatePublicationDDLCommandsIdempotent(const ObjectAddress *address); +extern char * CreatePublicationDDLCommand(Oid publicationId); +extern List * PreprocessAlterPublicationStmt(Node *stmt, const char *queryString, + ProcessUtilityContext processUtilityCtx); +extern List * GetAlterPublicationDDLCommandsForTable(Oid relationId, bool isAdd); +extern char * GetAlterPublicationTableDDLCommand(Oid publicationId, Oid relationId, + bool isAdd); +extern List * AlterPublicationOwnerStmtObjectAddress(Node *node, bool missingOk, + bool isPostProcess); +extern List * AlterPublicationStmtObjectAddress(Node *node, bool missingOk, + bool isPostProcess); +extern List * CreatePublicationStmtObjectAddress(Node *node, bool missingOk, + bool isPostProcess); +extern List * RenamePublicationStmtObjectAddress(Node *node, bool missingOk, + bool isPostProcess); + /* rename.c - forward declarations*/ extern List * PreprocessRenameStmt(Node *renameStmt, const char *renameCommand, ProcessUtilityContext processUtilityContext); @@ -566,6 +584,9 @@ extern bool ConstrTypeCitusCanDefaultName(ConstrType constrType); extern char * GetAlterColumnWithNextvalDefaultCmd(Oid sequenceOid, Oid relationId, char *colname, bool missingTableOk); +extern void ErrorIfTableHasUnsupportedIdentityColumn(Oid relationId); +extern void ErrorIfTableHasIdentityColumn(Oid relationId); + /* text_search.c - forward declarations */ extern List * GetCreateTextSearchConfigStatements(const ObjectAddress *address); extern List * GetCreateTextSearchDictionaryStatements(const ObjectAddress *address); @@ -657,7 +678,6 @@ extern List * PreprocessDropViewStmt(Node *node, const char *queryString, extern List * DropViewStmtObjectAddress(Node *node, bool missing_ok, bool isPostprocess); extern char * CreateViewDDLCommand(Oid viewOid); extern List * GetViewCreationCommandsOfTable(Oid relationId); -extern List * GetViewCreationTableDDLCommandsOfTable(Oid relationId); extern char * AlterViewOwnerCommand(Oid viewOid); extern char * DeparseViewStmt(Node *node); extern char * DeparseDropViewStmt(Node *node); diff --git a/src/include/distributed/commands/multi_copy.h b/src/include/distributed/commands/multi_copy.h index 13d589a3a..689725e70 100644 --- a/src/include/distributed/commands/multi_copy.h +++ b/src/include/distributed/commands/multi_copy.h @@ -133,6 +133,12 @@ typedef struct CitusCopyDestReceiver /* if true, should copy to local placements in the current session */ bool shouldUseLocalCopy; + /* + * if true, the data from this dest receiver should be published for CDC clients. + * This is set tot false for internal transfers like shard split/move/rebalance etc. + */ + bool isPublishable; + /* * Copy into colocated intermediate result. When this is set, the * COPY assumes there are hypothetical colocated shards to the @@ -161,7 +167,8 @@ extern CitusCopyDestReceiver * CreateCitusCopyDestReceiver(Oid relationId, List *columnNameList, int partitionColumnIndex, EState *executorState, - char *intermediateResultPrefix); + char *intermediateResultPrefix, + bool isPublishable); extern FmgrInfo * ColumnOutputFunctions(TupleDesc rowDescriptor, bool binaryFormat); extern bool CanUseBinaryCopyFormat(TupleDesc tupleDescription); extern bool CanUseBinaryCopyFormatForTargetList(List *targetEntryList); diff --git a/src/include/distributed/connection_management.h b/src/include/distributed/connection_management.h index 4ffb83a86..8c2584451 100644 --- a/src/include/distributed/connection_management.h +++ b/src/include/distributed/connection_management.h @@ -42,10 +42,31 @@ /* application name used for connections made by run_command_on_* */ #define CITUS_RUN_COMMAND_APPLICATION_NAME_PREFIX "citus_run_command gpid=" +/* + * application name prefix for move/split replication connections. + * + * This application_name is set to the subscription name by logical replication + * workers, so there is no GPID. + */ +#define CITUS_SHARD_TRANSFER_APPLICATION_NAME_PREFIX "citus_shard_" + /* deal with waiteventset errors */ #define WAIT_EVENT_SET_INDEX_NOT_INITIALIZED -1 #define WAIT_EVENT_SET_INDEX_FAILED -2 +/* + * UINT32_MAX is reserved in pg_dist_node, so we can use it safely. + */ +#define LOCAL_NODE_ID UINT32_MAX + +/* + * If you want to connect to the current node use `LocalHostName`, which is a GUC, instead + * of the hardcoded loopback hostname. Only if you really need the loopback hostname use + * this define. + */ +#define LOCAL_HOST_NAME "localhost" + + /* forward declare, to avoid forcing large headers on everyone */ struct pg_conn; /* target of the PGconn typedef */ struct MemoryContextData; @@ -173,6 +194,9 @@ typedef struct MultiConnection /* is the connection currently in use, and shouldn't be used by anything else */ bool claimedExclusively; + /* is the replication origin session has already been setup for this connection. */ + bool isReplicationOriginSessionSetup; + /* * Should be used to access/modify metadata. See REQUIRE_METADATA_CONNECTION for * the details. @@ -312,6 +336,7 @@ extern void ShutdownConnection(MultiConnection *connection); /* dealing with a connection */ extern void FinishConnectionListEstablishment(List *multiConnectionList); extern void FinishConnectionEstablishment(MultiConnection *connection); +extern void ForceConnectionCloseAtTransactionEnd(MultiConnection *connection); extern void ClaimConnectionExclusively(MultiConnection *connection); extern void UnclaimConnection(MultiConnection *connection); extern void MarkConnectionConnected(MultiConnection *connection); diff --git a/src/include/distributed/coordinator_protocol.h b/src/include/distributed/coordinator_protocol.h index 1444bff91..ad8329a6c 100644 --- a/src/include/distributed/coordinator_protocol.h +++ b/src/include/distributed/coordinator_protocol.h @@ -124,8 +124,7 @@ typedef enum IncludeSequenceDefaults typedef enum IncludeIdentities { NO_IDENTITY = 0, /* don't include identities */ - INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS = 1, /* include identities as sequences */ - INCLUDE_IDENTITY = 2 /* include identities as-is*/ + INCLUDE_IDENTITY = 1 /* include identities as-is*/ } IncludeIdentities; @@ -311,8 +310,6 @@ extern ShardPlacement * SearchShardPlacementInList(List *shardPlacementList, extern ShardPlacement * SearchShardPlacementInListOrError(List *shardPlacementList, const char *nodeName, uint32 nodePort); -extern void ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int - targetNodePort); extern char LookupShardTransferMode(Oid shardReplicationModeOid); extern void BlockWritesToShardList(List *shardList); extern List * WorkerApplyShardDDLCommandList(List *ddlCommandList, int64 shardId); diff --git a/src/include/distributed/deparser.h b/src/include/distributed/deparser.h index 87704b628..0d4f605d8 100644 --- a/src/include/distributed/deparser.h +++ b/src/include/distributed/deparser.h @@ -210,6 +210,23 @@ extern char * DeparseAlterExtensionStmt(Node *stmt); /* forward declarations for deparse_database_stmts.c */ extern char * DeparseAlterDatabaseOwnerStmt(Node *node); +/* forward declaration for deparse_publication_stmts.c */ +extern char * DeparseCreatePublicationStmt(Node *stmt); +extern char * DeparseCreatePublicationStmtExtended(Node *node, + bool whereClauseNeedsTransform, + bool includeLocalTables); +extern char * DeparseAlterPublicationStmt(Node *stmt); +extern char * DeparseAlterPublicationStmtExtended(Node *stmt, + bool whereClauseNeedsTransform, + bool includeLocalTables); +extern char * DeparseAlterPublicationOwnerStmt(Node *stmt); +extern char * DeparseAlterPublicationSchemaStmt(Node *node); +extern char * DeparseDropPublicationStmt(Node *stmt); +extern char * DeparseRenamePublicationStmt(Node *node); + +extern void QualifyCreatePublicationStmt(Node *node); +extern void QualifyAlterPublicationStmt(Node *node); + /* forward declatations for deparse_text_search_stmts.c */ extern void QualifyAlterTextSearchConfigurationOwnerStmt(Node *node); extern void QualifyAlterTextSearchConfigurationSchemaStmt(Node *node); diff --git a/src/include/distributed/distributed_execution_locks.h b/src/include/distributed/distributed_execution_locks.h index b274cd459..e789843ae 100644 --- a/src/include/distributed/distributed_execution_locks.h +++ b/src/include/distributed/distributed_execution_locks.h @@ -16,10 +16,14 @@ #include "storage/lockdefs.h" #include "distributed/multi_physical_planner.h" +extern void AcquireExecutorShardLocksForExecution(RowModifyLevel modLevel, + List *taskList); extern void AcquireExecutorShardLocksForRelationRowLockList(List *relationRowLockList); extern bool RequiresConsistentSnapshot(Task *task); extern void AcquireMetadataLocks(List *taskList); extern void LockPartitionsInRelationList(List *relationIdList, LOCKMODE lockmode); extern void LockPartitionRelations(Oid relationId, LOCKMODE lockMode); +extern void LockPartitionsForDistributedPlan(DistributedPlan *distributedPlan); + #endif /* DISTRIBUTED_EXECUTION_LOCKS_H */ diff --git a/src/include/distributed/distributed_planner.h b/src/include/distributed/distributed_planner.h index 29c3c7154..412859449 100644 --- a/src/include/distributed/distributed_planner.h +++ b/src/include/distributed/distributed_planner.h @@ -255,6 +255,4 @@ extern struct DistributedPlan * CreateDistributedPlan(uint64 planId, PlannerRestrictionContext * plannerRestrictionContext); -extern bool IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte); - #endif /* DISTRIBUTED_PLANNER_H */ diff --git a/src/include/distributed/executor_util.h b/src/include/distributed/executor_util.h new file mode 100644 index 000000000..8560c6dfd --- /dev/null +++ b/src/include/distributed/executor_util.h @@ -0,0 +1,44 @@ +/*------------------------------------------------------------------------- + * + * executor_util.h + * Utility functions for executing task lists. + * + *------------------------------------------------------------------------- + */ +#ifndef EXECUTOR_UTIL_H +#define EXECUTOR_UTIL_H + +#include "fmgr.h" +#include "funcapi.h" + +#include "access/tupdesc.h" +#include "distributed/multi_physical_planner.h" +#include "nodes/params.h" +#include "nodes/pg_list.h" + + +/* utility functions for dealing with tasks in the executor */ +extern bool TaskListModifiesDatabase(RowModifyLevel modLevel, List *taskList); +extern bool TaskListRequiresRollback(List *taskList); +extern bool TaskListRequires2PC(List *taskList); +extern bool TaskListCannotBeExecutedInTransaction(List *taskList); +extern bool SelectForUpdateOnReferenceTable(List *taskList); +extern bool ReadOnlyTask(TaskType taskType); +extern bool ModifiedTableReplicated(List *taskList); +extern bool ShouldRunTasksSequentially(List *taskList); + +/* utility functions for handling parameters in the executor */ +extern void ExtractParametersForRemoteExecution(ParamListInfo paramListInfo, + Oid **parameterTypes, + const char ***parameterValues); +extern void ExtractParametersFromParamList(ParamListInfo paramListInfo, + Oid **parameterTypes, + const char ***parameterValues, bool + useOriginalCustomTypeOids); + +/* utility functions for processing tuples in the executor */ +extern AttInMetadata * TupleDescGetAttBinaryInMetadata(TupleDesc tupdesc); +extern HeapTuple BuildTupleFromBytes(AttInMetadata *attinmeta, fmStringInfo *values); + + +#endif /* EXECUTOR_UTIL_H */ diff --git a/src/include/distributed/intermediate_result_pruning.h b/src/include/distributed/intermediate_result_pruning.h index 5880cd23c..fcd6027ef 100644 --- a/src/include/distributed/intermediate_result_pruning.h +++ b/src/include/distributed/intermediate_result_pruning.h @@ -13,18 +13,6 @@ #include "distributed/subplan_execution.h" -/* - * UINT32_MAX is reserved in pg_dist_node, so we can use it safely. - */ -#define LOCAL_NODE_ID UINT32_MAX - -/* - * If you want to connect to the current node use `LocalHostName`, which is a GUC, instead - * of the hardcoded loopback hostname. Only if you really need the loopback hostname use - * this define. - */ -#define LOCAL_HOST_NAME "localhost" - extern bool LogIntermediateResults; extern List * FindSubPlanUsages(DistributedPlan *plan); diff --git a/src/include/distributed/local_executor.h b/src/include/distributed/local_executor.h index c555f1f82..2248e3f58 100644 --- a/src/include/distributed/local_executor.h +++ b/src/include/distributed/local_executor.h @@ -14,7 +14,6 @@ #include "distributed/citus_custom_scan.h" #include "distributed/tuple_destination.h" - /* enabled with GUCs*/ extern bool EnableLocalExecution; extern bool LogLocalCommands; diff --git a/src/include/distributed/merge_planner.h b/src/include/distributed/merge_planner.h new file mode 100644 index 000000000..158f26861 --- /dev/null +++ b/src/include/distributed/merge_planner.h @@ -0,0 +1,31 @@ +/*------------------------------------------------------------------------- + * + * merge_planner.h + * + * Declarations for public functions and types related to router planning. + * + * Copyright (c) Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#ifndef MERGE_PLANNER_H +#define MERGE_PLANNER_H + +#include "c.h" + +#include "nodes/parsenodes.h" +#include "distributed/distributed_planner.h" +#include "distributed/errormessage.h" +#include "distributed/multi_physical_planner.h" + +extern bool IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte); +extern DeferredErrorMessage * MergeQuerySupported(Query *originalQuery, + bool multiShardQuery, + PlannerRestrictionContext * + plannerRestrictionContext); +extern DistributedPlan * CreateMergePlan(Query *originalQuery, Query *query, + PlannerRestrictionContext * + plannerRestrictionContext); + +#endif /* MERGE_PLANNER_H */ diff --git a/src/include/distributed/metadata/dependency.h b/src/include/distributed/metadata/dependency.h index c5a65319e..2d3759e1f 100644 --- a/src/include/distributed/metadata/dependency.h +++ b/src/include/distributed/metadata/dependency.h @@ -19,6 +19,8 @@ #include "distributed/errormessage.h" #include "nodes/pg_list.h" +typedef bool (*AddressPredicate)(const ObjectAddress *); + extern List * GetUniqueDependenciesList(List *objectAddressesList); extern List * GetDependenciesForObject(const ObjectAddress *target); extern List * GetAllSupportedDependenciesForObject(const ObjectAddress *target); @@ -33,5 +35,7 @@ extern List * GetPgDependTuplesForDependingObjects(Oid targetObjectClassId, Oid targetObjectId); extern List * GetDependingViews(Oid relationId); extern Oid GetDependingView(Form_pg_depend pg_depend); +extern List * FilterObjectAddressListByPredicate(List *objectAddressList, + AddressPredicate predicate); #endif /* CITUS_DEPENDENCY_H */ diff --git a/src/include/distributed/metadata_cache.h b/src/include/distributed/metadata_cache.h index 07fa50e64..c23a047ec 100644 --- a/src/include/distributed/metadata_cache.h +++ b/src/include/distributed/metadata_cache.h @@ -133,9 +133,6 @@ typedef enum REFERENCE_TABLE, CITUS_LOCAL_TABLE, - /* table without a dist key such as reference table */ - CITUS_TABLE_WITH_NO_DIST_KEY, - ANY_CITUS_TABLE_TYPE } CitusTableType; @@ -143,18 +140,23 @@ extern List * AllCitusTableIds(void); extern bool IsCitusTableType(Oid relationId, CitusTableType tableType); extern bool IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEtnry, CitusTableType tableType); +bool HasDistributionKey(Oid relationId); +bool HasDistributionKeyCacheEntry(CitusTableCacheEntry *tableEntry); extern char * GetTableTypeName(Oid tableId); extern void SetCreateCitusTransactionLevel(int val); extern int GetCitusCreationLevel(void); extern bool IsCitusTable(Oid relationId); +extern bool IsCitusTableRangeVar(RangeVar *rangeVar, LOCKMODE lockMode, bool missingOk); extern bool IsCitusTableViaCatalog(Oid relationId); extern char PgDistPartitionViaCatalog(Oid relationId); extern List * LookupDistShardTuples(Oid relationId); extern char PartitionMethodViaCatalog(Oid relationId); extern Var * PartitionColumnViaCatalog(Oid relationId); extern uint32 ColocationIdViaCatalog(Oid relationId); -extern bool IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel); +bool IsReferenceTableByDistParams(char partitionMethod, char replicationModel); +extern bool IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel, + uint32 colocationId); extern List * CitusTableList(void); extern ShardInterval * LoadShardInterval(uint64 shardId); extern bool ShardExists(uint64 shardId); diff --git a/src/include/distributed/metadata_sync.h b/src/include/distributed/metadata_sync.h index e06b5268f..d5878ec71 100644 --- a/src/include/distributed/metadata_sync.h +++ b/src/include/distributed/metadata_sync.h @@ -18,9 +18,31 @@ #include "distributed/metadata_cache.h" #include "nodes/pg_list.h" +/* managed via guc.c */ +typedef enum +{ + METADATA_SYNC_TRANSACTIONAL = 0, + METADATA_SYNC_NON_TRANSACTIONAL = 1 +} MetadataSyncTransactionMode; + /* config variables */ extern int MetadataSyncInterval; extern int MetadataSyncRetryInterval; +extern int MetadataSyncTransMode; + +/* + * MetadataSyncContext is used throughout metadata sync. + */ +typedef struct MetadataSyncContext +{ + List *activatedWorkerNodeList; /* activated worker nodes */ + List *activatedWorkerBareConnections; /* bare connections to activated nodes */ + MemoryContext context; /* memory context for all allocations */ + MetadataSyncTransactionMode transactionMode; /* transaction mode for the sync */ + bool collectCommands; /* if we collect commands instead of sending and resetting */ + List *collectedCommands; /* collected commands. (NIL if collectCommands == false) */ + bool nodesAddedInSameTransaction; /* if the nodes are added just before activation */ +} MetadataSyncContext; typedef enum { @@ -52,7 +74,6 @@ extern void citus_internal_add_placement_metadata_internal(int64 shardId, int64 shardLength, int32 groupId, int64 placementId); -extern void SyncNodeMetadataToNode(const char *nodeNameString, int32 nodePort); extern void SyncCitusTableMetadata(Oid relationId); extern void EnsureSequentialModeMetadataOperations(void); extern bool ClusterHasKnownMetadataWorkers(void); @@ -60,10 +81,10 @@ extern char * LocalGroupIdUpdateCommand(int32 groupId); extern bool ShouldSyncUserCommandForObject(ObjectAddress objectAddress); extern bool ShouldSyncTableMetadata(Oid relationId); extern bool ShouldSyncTableMetadataViaCatalog(Oid relationId); +extern Oid FetchRelationIdFromPgPartitionHeapTuple(HeapTuple heapTuple, + TupleDesc tupleDesc); extern bool ShouldSyncSequenceMetadata(Oid relationId); extern List * NodeMetadataCreateCommands(void); -extern List * DistributedObjectMetadataSyncCommandList(void); -extern List * ColocationGroupCreateCommandList(void); extern List * CitusTableMetadataCreateCommandList(Oid relationId); extern List * NodeMetadataDropCommands(void); extern char * MarkObjectsDistributedCreateCommand(List *addresses, @@ -76,6 +97,7 @@ extern char * DistributionDeleteCommand(const char *schemaName, extern char * DistributionDeleteMetadataCommand(Oid relationId); extern char * TableOwnerResetCommand(Oid distributedRelationId); extern char * NodeListInsertCommand(List *workerNodeList); +char * NodeListIdempotentInsertCommand(List *workerNodeList); extern List * ShardListInsertCommand(List *shardIntervalList); extern List * ShardDeleteCommandList(ShardInterval *shardInterval); extern char * NodeDeleteCommand(uint32 nodeId); @@ -101,11 +123,12 @@ extern void SyncNodeMetadataToNodesMain(Datum main_arg); extern void SignalMetadataSyncDaemon(Oid database, int sig); extern bool ShouldInitiateMetadataSync(bool *lockFailure); extern List * SequenceDependencyCommandList(Oid relationId); +extern List * IdentitySequenceDependencyCommandList(Oid targetRelationId); extern List * DDLCommandsForSequence(Oid sequenceOid, char *ownerName); extern List * GetSequencesFromAttrDef(Oid attrdefOid); extern void GetDependentSequencesWithRelation(Oid relationId, List **seqInfoList, - AttrNumber attnum); + AttrNumber attnum, char depType); extern List * GetDependentFunctionsWithRelation(Oid relationId); extern Oid GetAttributeTypeOid(Oid relationId, AttrNumber attnum); extern void SetLocalEnableMetadataSync(bool state); @@ -115,14 +138,46 @@ extern void SyncNewColocationGroupToNodes(uint32 colocationId, int shardCount, Oid distributionColumnCollation); extern void SyncDeleteColocationGroupToNodes(uint32 colocationId); +extern MetadataSyncContext * CreateMetadataSyncContext(List *nodeList, + bool collectCommands, + bool nodesAddedInSameTransaction); +extern void EstablishAndSetMetadataSyncBareConnections(MetadataSyncContext *context); +extern void SetMetadataSyncNodesFromNodeList(MetadataSyncContext *context, + List *nodeList); +extern void ResetMetadataSyncMemoryContext(MetadataSyncContext *context); +extern bool MetadataSyncCollectsCommands(MetadataSyncContext *context); +extern void SendOrCollectCommandListToActivatedNodes(MetadataSyncContext *context, + List *commands); +extern void SendOrCollectCommandListToMetadataNodes(MetadataSyncContext *context, + List *commands); +extern void SendOrCollectCommandListToSingleNode(MetadataSyncContext *context, + List *commands, int nodeIdx); + +extern void ActivateNodeList(MetadataSyncContext *context); + +extern char * WorkerDropAllShellTablesCommand(bool singleTransaction); + +extern void SyncDistributedObjects(MetadataSyncContext *context); +extern void SendNodeWideObjectsSyncCommands(MetadataSyncContext *context); +extern void SendShellTableDeletionCommands(MetadataSyncContext *context); +extern void SendMetadataDeletionCommands(MetadataSyncContext *context); +extern void SendColocationMetadataCommands(MetadataSyncContext *context); +extern void SendDependencyCreationCommands(MetadataSyncContext *context); +extern void SendDistTableMetadataCommands(MetadataSyncContext *context); +extern void SendDistObjectCommands(MetadataSyncContext *context); +extern void SendInterTableRelationshipCommands(MetadataSyncContext *context); + #define DELETE_ALL_NODES "DELETE FROM pg_dist_node" #define DELETE_ALL_PLACEMENTS "DELETE FROM pg_dist_placement" #define DELETE_ALL_SHARDS "DELETE FROM pg_dist_shard" #define DELETE_ALL_DISTRIBUTED_OBJECTS "DELETE FROM pg_catalog.pg_dist_object" #define DELETE_ALL_PARTITIONS "DELETE FROM pg_dist_partition" #define DELETE_ALL_COLOCATION "DELETE FROM pg_catalog.pg_dist_colocation" -#define REMOVE_ALL_SHELL_TABLES_COMMAND \ - "SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition" +#define WORKER_DROP_ALL_SHELL_TABLES \ + "CALL pg_catalog.worker_drop_all_shell_tables(%s)" +#define CITUS_INTERNAL_MARK_NODE_NOT_SYNCED \ + "SELECT citus_internal_mark_node_not_synced(%d, %d)" + #define REMOVE_ALL_CITUS_TABLES_COMMAND \ "SELECT worker_drop_distributed_table(logicalrelid::regclass::text) FROM pg_dist_partition" #define BREAK_CITUS_TABLE_SEQUENCE_DEPENDENCY_COMMAND \ @@ -146,6 +201,8 @@ extern void SyncDeleteColocationGroupToNodes(uint32 colocationId); "placementid = EXCLUDED.placementid" #define METADATA_SYNC_CHANNEL "metadata_sync" +#define WORKER_ADJUST_IDENTITY_COLUMN_SEQ_RANGES \ + "SELECT pg_catalog.worker_adjust_identity_column_seq_ranges(%s)" /* controlled via GUC */ extern char *EnableManualMetadataChangesForUser; diff --git a/src/include/distributed/metadata_utility.h b/src/include/distributed/metadata_utility.h index ceea51678..f7b2038ee 100644 --- a/src/include/distributed/metadata_utility.h +++ b/src/include/distributed/metadata_utility.h @@ -299,6 +299,9 @@ extern WorkerNode * ActiveShardPlacementWorkerNode(uint64 shardId); extern List * BuildShardPlacementList(int64 shardId); extern List * AllShardPlacementsOnNodeGroup(int32 groupId); extern List * GroupShardPlacementsForTableOnGroup(Oid relationId, int32 groupId); +extern void LookupTaskPlacementHostAndPort(ShardPlacement *taskPlacement, char **nodeName, + int *nodePort); +extern bool IsDummyPlacement(ShardPlacement *taskPlacement); extern StringInfo GenerateSizeQueryOnMultiplePlacements(List *shardIntervalList, SizeQueryType sizeQueryType, bool optimizePartitionCalculations); @@ -325,6 +328,7 @@ extern void DeleteShardPlacementRow(uint64 placementId); extern void CreateDistributedTable(Oid relationId, char *distributionColumnName, char distributionMethod, int shardCount, bool shardCountIsStrict, char *colocateWithTableName); +extern void CreateReferenceTable(Oid relationId); extern void CreateTruncateTrigger(Oid relationId); extern TableConversionReturn * UndistributeTable(TableConversionParameters *params); @@ -337,7 +341,6 @@ extern List * GetAllDependencyCreateDDLCommands(const List *dependencies); extern bool ShouldPropagate(void); extern bool ShouldPropagateCreateInCoordinatedTransction(void); extern bool ShouldPropagateAnyObject(List *addresses); -extern List * ReplicateAllObjectsToNodeCommandList(const char *nodeName, int nodePort); /* Remaining metadata utility functions */ extern Oid TableOwnerOid(Oid relationId); diff --git a/src/include/distributed/multi_executor.h b/src/include/distributed/multi_executor.h index b9f272d0a..4e7f13601 100644 --- a/src/include/distributed/multi_executor.h +++ b/src/include/distributed/multi_executor.h @@ -144,13 +144,6 @@ extern void SetLocalMultiShardModifyModeToSequential(void); extern void EnsureSequentialMode(ObjectType objType); extern void SetLocalForceMaxQueryParallelization(void); extern void SortTupleStore(CitusScanState *scanState); -extern bool DistributedPlanModifiesDatabase(DistributedPlan *plan); -extern bool ReadOnlyTask(TaskType taskType); -extern bool TaskListCannotBeExecutedInTransaction(List *taskList); -extern void ExtractParametersFromParamList(ParamListInfo paramListInfo, - Oid **parameterTypes, - const char ***parameterValues, bool - useOriginalCustomTypeOids); extern ParamListInfo ExecutorBoundParams(void); extern void EnsureTaskExecutionAllowed(bool isRemote); diff --git a/src/include/distributed/multi_physical_planner.h b/src/include/distributed/multi_physical_planner.h index 920541e97..d6ad4c248 100644 --- a/src/include/distributed/multi_physical_planner.h +++ b/src/include/distributed/multi_physical_planner.h @@ -553,6 +553,7 @@ extern bool BinaryOpExpression(Expr *clause, Node **leftOperand, Node **rightOpe /* helper functions */ extern Var * MakeInt4Column(void); extern int CompareShardPlacements(const void *leftElement, const void *rightElement); +extern int CompareGroupShardPlacements(const void *leftElement, const void *rightElement); extern bool ShardIntervalsOverlap(ShardInterval *firstInterval, ShardInterval *secondInterval); extern bool ShardIntervalsOverlapWithParams(Datum firstMin, Datum firstMax, diff --git a/src/include/distributed/multi_router_planner.h b/src/include/distributed/multi_router_planner.h index 62d698b51..200c498ef 100644 --- a/src/include/distributed/multi_router_planner.h +++ b/src/include/distributed/multi_router_planner.h @@ -28,6 +28,8 @@ extern bool EnableRouterExecution; extern bool EnableFastPathRouterPlanner; +extern bool EnableNonColocatedRouterQueryPushdown; + extern DistributedPlan * CreateRouterPlan(Query *originalQuery, Query *query, PlannerRestrictionContext * plannerRestrictionContext); @@ -99,6 +101,21 @@ extern PlannedStmt * FastPathPlanner(Query *originalQuery, Query *parse, ParamLi boundParams); extern bool FastPathRouterQuery(Query *query, Node **distributionKeyValue); extern bool JoinConditionIsOnFalse(List *relOptInfo); - +extern Oid ResultRelationOidForQuery(Query *query); +extern DeferredErrorMessage * TargetlistAndFunctionsSupported(Oid resultRelationId, + FromExpr *joinTree, + Node *quals, + List *targetList, + CmdType commandType, + List *returningList); +extern bool NodeIsFieldStore(Node *node); +extern bool TargetEntryChangesValue(TargetEntry *targetEntry, Var *column, + FromExpr *joinTree); +extern bool MasterIrreducibleExpression(Node *expression, bool *varArgument, + bool *badCoalesce); +extern bool HasDangerousJoinUsing(List *rtableList, Node *jtnode); +extern Job * RouterJob(Query *originalQuery, + PlannerRestrictionContext *plannerRestrictionContext, + DeferredErrorMessage **planningError); #endif /* MULTI_ROUTER_PLANNER_H */ diff --git a/src/include/distributed/reference_table_utils.h b/src/include/distributed/reference_table_utils.h index ce2de9d9d..cf5a6fd02 100644 --- a/src/include/distributed/reference_table_utils.h +++ b/src/include/distributed/reference_table_utils.h @@ -17,14 +17,20 @@ #include "listutils.h" #include "distributed/metadata_cache.h" +#include "distributed/metadata_sync.h" extern void EnsureReferenceTablesExistOnAllNodes(void); extern void EnsureReferenceTablesExistOnAllNodesExtended(char transferMode); extern bool HasNodesWithMissingReferenceTables(List **referenceTableList); extern uint32 CreateReferenceTableColocationId(void); extern uint32 GetReferenceTableColocationId(void); +extern List * GetAllReplicatedTableList(void); +extern List * ReplicatedPlacementsForNodeGroup(int32 groupId); +extern char * DeleteShardPlacementCommand(uint64 placementId); extern void DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly); +extern void DeleteAllReplicatedTablePlacementsFromNodeGroupViaMetadataContext( + MetadataSyncContext *context, int32 groupId, bool localOnly); extern int CompareOids(const void *leftElement, const void *rightElement); extern void ReplicateAllReferenceTablesToNode(WorkerNode *workerNode); extern void ErrorIfNotAllNodesHaveReferenceTableReplicas(List *workerNodeList); diff --git a/src/include/distributed/relation_restriction_equivalence.h b/src/include/distributed/relation_restriction_equivalence.h index ccd50a6db..42b2b801f 100644 --- a/src/include/distributed/relation_restriction_equivalence.h +++ b/src/include/distributed/relation_restriction_equivalence.h @@ -54,4 +54,6 @@ extern RelationRestrictionContext * FilterRelationRestrictionContext( RelationRestrictionContext *relationRestrictionContext, Relids queryRteIdentities); +extern bool AllDistributedRelationsInRTEListColocated(List *rangeTableEntryList); +extern bool AllDistributedRelationsInListColocated(List *relationList); #endif /* RELATION_RESTRICTION_EQUIVALENCE_H */ diff --git a/src/include/distributed/remote_commands.h b/src/include/distributed/remote_commands.h index f903ebe66..71cb9dad2 100644 --- a/src/include/distributed/remote_commands.h +++ b/src/include/distributed/remote_commands.h @@ -48,6 +48,8 @@ extern void ExecuteCriticalRemoteCommand(MultiConnection *connection, const char *command); extern void ExecuteRemoteCommandInConnectionList(List *nodeConnectionList, const char *command); +extern bool ExecuteRemoteCommandAndCheckResult(MultiConnection *connection, + char *command, char *expected); extern int ExecuteOptionalRemoteCommand(MultiConnection *connection, const char *command, PGresult **result); diff --git a/src/include/distributed/replication_origin_session_utils.h b/src/include/distributed/replication_origin_session_utils.h new file mode 100644 index 000000000..e90bd8ab8 --- /dev/null +++ b/src/include/distributed/replication_origin_session_utils.h @@ -0,0 +1,31 @@ +/*------------------------------------------------------------------------- + * + * replication_origin_utils.h + * Utilities related to replication origin. + * + * Copyright (c) Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#ifndef REPLICATION_ORIGIN_SESSION_UTILS_H +#define REPLICATION_ORIGIN_SESSION_UTILS_H + +#include "postgres.h" +#include "replication/origin.h" +#include "distributed/connection_management.h" + +extern void InitializeReplicationOriginSessionUtils(void); + +extern void SetupReplicationOriginRemoteSession(MultiConnection *connection); +extern void ResetReplicationOriginRemoteSession(MultiConnection *connection); + +extern void SetupReplicationOriginLocalSession(void); +extern void ResetReplicationOriginLocalSession(void); +extern void ResetReplicationOriginLocalSessionCallbackHandler(void *arg); + + +extern bool EnableChangeDataCapture; + + +#endif /* REPLICATION_ORIGIN_SESSION_UTILS_H */ diff --git a/src/include/distributed/resource_lock.h b/src/include/distributed/resource_lock.h index 9e143e467..9efa1b767 100644 --- a/src/include/distributed/resource_lock.h +++ b/src/include/distributed/resource_lock.h @@ -53,8 +53,7 @@ typedef enum CitusOperations CITUS_NONBLOCKING_SPLIT = 1, CITUS_CREATE_DISTRIBUTED_TABLE_CONCURRENTLY = 2, CITUS_CREATE_COLOCATION_DEFAULT = 3, - CITUS_SHARD_MOVE = 4, - CITUS_BACKGROUND_TASK_MONITOR = 5 + CITUS_BACKGROUND_TASK_MONITOR = 4 } CitusOperations; /* reuse advisory lock, but with different, unused field 4 (4)*/ @@ -165,8 +164,6 @@ enum DistLockConfigs /* Lock shard/relation metadata for safe modifications */ extern void LockShardDistributionMetadata(int64 shardId, LOCKMODE lockMode); -extern void LockPlacementCleanup(void); -extern bool TryLockPlacementCleanup(void); extern void EnsureShardOwner(uint64 shardId, bool missingOk); extern void LockShardListMetadataOnWorkers(LOCKMODE lockmode, List *shardIntervalList); extern void BlockWritesToShardList(List *shardList); diff --git a/src/include/distributed/shard_transfer.h b/src/include/distributed/shard_transfer.h index ff2eb2809..a37e5abdb 100644 --- a/src/include/distributed/shard_transfer.h +++ b/src/include/distributed/shard_transfer.h @@ -12,11 +12,17 @@ #include "distributed/shard_rebalancer.h" #include "nodes/pg_list.h" -extern void citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName, - int32 sourceNodePort, - char *targetNodeName, - int32 targetNodePort, - Oid shardReplicationModeOid); +typedef enum +{ + SHARD_TRANSFER_INVALID_FIRST = 0, + SHARD_TRANSFER_MOVE = 1, + SHARD_TRANSFER_COPY = 2 +} ShardTransferType; + +extern void TransferShards(int64 shardId, + char *sourceNodeName, int32 sourceNodePort, + char *targetNodeName, int32 targetNodePort, + char shardReplicationMode, ShardTransferType transferType); extern uint64 ShardListSizeInBytes(List *colocatedShardList, char *workerNodeName, uint32 workerNodePort); extern void ErrorIfMoveUnsupportedTableType(Oid relationId); diff --git a/src/include/distributed/worker_create_or_replace.h b/src/include/distributed/worker_create_or_replace.h index 148cee138..f0b1e8077 100644 --- a/src/include/distributed/worker_create_or_replace.h +++ b/src/include/distributed/worker_create_or_replace.h @@ -21,6 +21,7 @@ extern char * WrapCreateOrReplace(const char *sql); extern char * WrapCreateOrReplaceList(List *sqls); extern char * GenerateBackupNameForCollision(const ObjectAddress *address); +extern DropStmt * CreateDropStmt(const ObjectAddress *address); extern RenameStmt * CreateRenameStatement(const ObjectAddress *address, char *newName); #endif /* WORKER_CREATE_OR_REPLACE_H */ diff --git a/src/include/distributed/worker_manager.h b/src/include/distributed/worker_manager.h index bb7abf183..5ad7f4962 100644 --- a/src/include/distributed/worker_manager.h +++ b/src/include/distributed/worker_manager.h @@ -62,9 +62,6 @@ extern int MaxWorkerNodesTracked; extern char *WorkerListFileName; extern char *CurrentCluster; -extern void ActivateNodeList(List *nodeList); -extern int ActivateNode(char *nodeName, int nodePort); - /* Function declarations for finding worker nodes to place shards on */ extern WorkerNode * WorkerGetRandomCandidateNode(List *currentNodeList); extern WorkerNode * WorkerGetRoundRobinCandidateNode(List *workerNodeList, @@ -87,6 +84,7 @@ extern WorkerNode * FindWorkerNode(const char *nodeName, int32 nodePort); extern WorkerNode * FindWorkerNodeOrError(const char *nodeName, int32 nodePort); extern WorkerNode * FindWorkerNodeAnyCluster(const char *nodeName, int32 nodePort); extern WorkerNode * FindNodeWithNodeId(int nodeId, bool missingOk); +extern WorkerNode * ModifiableWorkerNode(const char *nodeName, int32 nodePort); extern List * ReadDistNode(bool includeNodesFromOtherClusters); extern void EnsureCoordinator(void); extern void EnsureCoordinatorIsInMetadata(void); @@ -105,8 +103,6 @@ extern WorkerNode * SetWorkerColumnLocalOnly(WorkerNode *workerNode, int columnI Datum value); extern uint32 CountPrimariesWithMetadata(void); extern WorkerNode * GetFirstPrimaryWorkerNode(void); -extern List * SyncDistributedObjectsCommandList(WorkerNode *workerNode); -extern List * PgDistTableMetadataSyncCommandList(void); /* Function declarations for worker node utilities */ extern int CompareWorkerNodes(const void *leftElement, const void *rightElement); diff --git a/src/include/distributed/worker_shard_copy.h b/src/include/distributed/worker_shard_copy.h index 2ab2775f9..77f57c761 100644 --- a/src/include/distributed/worker_shard_copy.h +++ b/src/include/distributed/worker_shard_copy.h @@ -19,4 +19,9 @@ extern DestReceiver * CreateShardCopyDestReceiver(EState *executorState, List *destinationShardFullyQualifiedName, uint32_t destinationNodeId); +extern const char * CopyableColumnNamesFromRelationName(const char *schemaName, const + char *relationName); + +extern const char * CopyableColumnNamesFromTupleDesc(TupleDesc tupdesc); + #endif /* WORKER_SHARD_COPY_H_ */ diff --git a/src/include/distributed/worker_transaction.h b/src/include/distributed/worker_transaction.h index aa137b76b..be8fe5ed6 100644 --- a/src/include/distributed/worker_transaction.h +++ b/src/include/distributed/worker_transaction.h @@ -82,6 +82,8 @@ extern void SendCommandListToWorkerOutsideTransaction(const char *nodeName, extern void SendCommandListToWorkerOutsideTransactionWithConnection( MultiConnection *workerConnection, List *commandList); +extern void SendCommandListToWorkerListWithBareConnections(List *workerConnections, + List *commandList); extern void SendMetadataCommandListToWorkerListInCoordinatedTransaction( List *workerNodeList, const char * diff --git a/src/test/columnar_freezing/Makefile b/src/test/cdc/Makefile similarity index 62% rename from src/test/columnar_freezing/Makefile rename to src/test/cdc/Makefile index cd364cdbc..d67fe5499 100644 --- a/src/test/columnar_freezing/Makefile +++ b/src/test/cdc/Makefile @@ -1,33 +1,22 @@ #------------------------------------------------------------------------- # -# Makefile for src/test/columnar_freezing +# Makefile for src/test/cdc # -# Test that columnar freezing works. +# Test that CDC publication works correctly. # #------------------------------------------------------------------------- -subdir = src/test/columnar_freezing +subdir = src/test/cdc top_builddir = ../../.. include $(top_builddir)/Makefile.global -# In PG15, Perl test modules have been moved to a new namespace -# new() and get_new_node() methods have been unified to 1 method: new() -# Relevant PG commits 201a76183e2056c2217129e12d68c25ec9c559c8 -# b3b4d8e68ae83f432f43f035c7eb481ef93e1583 pg_version = $(shell $(PG_CONFIG) --version 2>/dev/null) pg_whole_version = $(shell echo "$(pg_version)"| sed -e 's/^PostgreSQL \([0-9]*\)\(\.[0-9]*\)\{0,1\}\(.*\)/\1\2/') pg_major_version = $(shell echo "$(pg_whole_version)"| sed -e 's/^\([0-9]\{2\}\)\(.*\)/\1/') +export pg_major_version + +test_path = t/*.pl -# for now, we only have a single test file -# due to the above explanation, we ended up separating the test paths for -# different versions. If you need to add new test files, be careful to add both versions -ifeq ($(pg_major_version),13) - test_path = t_pg13_pg14/*.pl -else ifeq ($(pg_major_version),14) - test_path = t_pg13_pg14/*.pl -else - test_path = t/*.pl -endif # copied from pgxs/Makefile.global to use postgres' abs build dir for pg_regress ifeq ($(enable_tap_tests),yes) diff --git a/src/test/cdc/postgresql.conf b/src/test/cdc/postgresql.conf new file mode 100644 index 000000000..1c0e1fad7 --- /dev/null +++ b/src/test/cdc/postgresql.conf @@ -0,0 +1 @@ +shared_preload_libraries='citus' diff --git a/src/test/cdc/t/001_cdc_create_distributed_table_test.pl b/src/test/cdc/t/001_cdc_create_distributed_table_test.pl new file mode 100644 index 000000000..5e57b8a54 --- /dev/null +++ b/src/test/cdc/t/001_cdc_create_distributed_table_test.pl @@ -0,0 +1,109 @@ +# Basic CDC test for create_distributed_table +use strict; +use warnings; + +use Test::More; + +use lib './t'; +use cdctestlib; + +# Initialize co-ordinator node +my $select_stmt = qq(SELECT * FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $result = 0; + +### Create the citus cluster with coordinator and two worker nodes +our ($node_coordinator, @workers) = create_citus_cluster(2,"localhost",57636); + +our $node_cdc_client = create_node('cdc_client', 0, "localhost", 57639); + +# Create the sensors table and ndexes. +my $initial_schema = " + CREATE TABLE sensors( + measureid integer, + eventdatetime timestamptz, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); + + CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); + ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; + CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); + CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); + CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors;"; + +$node_coordinator->safe_psql('postgres',$initial_schema); +$node_coordinator->safe_psql('postgres','ALTER TABLE sensors REPLICA IDENTITY FULL;'); + +$node_cdc_client->safe_psql('postgres',$initial_schema); + +create_cdc_publication_and_slots_for_coordinator($node_coordinator,'sensors'); +connect_cdc_client_to_coordinator_publication($node_coordinator, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_coordinator($node_coordinator); + +create_cdc_slots_for_workers(\@workers); + +# Distribut the sensors table to worker nodes. +$node_coordinator->safe_psql('postgres',"SELECT create_distributed_table('sensors', 'measureid');"); + +connect_cdc_client_to_workers_publication(\@workers, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +# Insert some data to the sensors table in the coordinator node. +$node_coordinator->safe_psql('postgres'," + INSERT INTO sensors + SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' + FROM generate_series(0,10)i;"); + +# Wait for the data changes to be replicated to the cdc client node. +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC basic test - distributed table insert data'); + + +# Update some data in the sensors table in the coordinator node. +$node_coordinator->safe_psql('postgres'," +UPDATE sensors + SET + eventdatetime=NOW(), + measure_data = jsonb_set(measure_data, '{val}', measureid::text::jsonb , TRUE), + measure_status = CASE + WHEN measureid % 2 = 0 + THEN 'y' + ELSE 'n' + END, + measure_comment= 'Comment:' || measureid::text;"); + +# Wait for the data changes to be replicated to the cdc client node. +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC basic test - distributed table update data'); + +# Delete some data from the sensors table in the coordinator node. +$node_coordinator->safe_psql('postgres'," +DELETE FROM sensors + WHERE (measureid % 2) = 0;"); + +# Wait for the data changes to be replicated to the cdc client node. +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC basic test - distributed table delete data'); + +$node_coordinator->safe_psql('postgres',"TRUNCATE sensors;"); + +# Wait for the data changes to be replicated to the cdc client node. +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC basic test - distributed table delete data'); + + +drop_cdc_client_subscriptions($node_cdc_client,\@workers); +done_testing(); diff --git a/src/test/cdc/t/002_cdc_create_distributed_table_concurrently.pl b/src/test/cdc/t/002_cdc_create_distributed_table_concurrently.pl new file mode 100644 index 000000000..511ec1672 --- /dev/null +++ b/src/test/cdc/t/002_cdc_create_distributed_table_concurrently.pl @@ -0,0 +1,99 @@ +# CDC test for create_distributed_table_concurrently +use strict; +use warnings; + +use Test::More; + +use lib './t'; +use cdctestlib; + + +# Initialize co-ordinator node +my $select_stmt = qq(SELECT * FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $result = 0; + +### Create the citus cluster with coordinator and two worker nodes +our ($node_coordinator, @workers) = create_citus_cluster(2,"localhost",57636); + +our $node_cdc_client = create_node('cdc_client', 0, "localhost", 57639); + +# Creeate the sensors table and ndexes. +my $initial_schema = " + CREATE TABLE sensors( + measureid integer, + eventdatetime timestamptz, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); + + CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); + ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; + CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); + CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); + CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors;"; + +$node_coordinator->safe_psql('postgres',$initial_schema); +$node_cdc_client->safe_psql('postgres',$initial_schema); + +create_cdc_publication_and_slots_for_coordinator($node_coordinator,'sensors'); +connect_cdc_client_to_coordinator_publication($node_coordinator, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_coordinator($node_coordinator); + +create_cdc_slots_for_workers(\@workers); + +# Distribut the sensors table to worker nodes. +$node_coordinator->safe_psql('postgres',"SELECT create_distributed_table_concurrently('sensors', 'measureid');"); + +connect_cdc_client_to_workers_publication(\@workers, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +# Insert some data to the sensors table in the coordinator node. +$node_coordinator->safe_psql('postgres'," + INSERT INTO sensors + SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' + FROM generate_series(0,10)i;"); + +# Wait for the data changes to be replicated to the cdc client node. +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + + +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC test - create_distributed_table_concurrently insert data'); + + +# Update some data in the sensors table in the coordinator node. +$node_coordinator->safe_psql('postgres'," +UPDATE sensors + SET + eventdatetime=NOW(), + measure_data = jsonb_set(measure_data, '{val}', measureid::text::jsonb , TRUE), + measure_status = CASE + WHEN measureid % 2 = 0 + THEN 'y' + ELSE 'n' + END, + measure_comment= 'Comment:' || measureid::text;"); + +# Wait for the data changes to be replicated to the cdc client node. +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC test - create_distributed_table_concurrently update data'); + +# Delete some data from the sensors table in the coordinator node. +$node_coordinator->safe_psql('postgres'," +DELETE FROM sensors + WHERE (measureid % 2) = 0;"); + +# Wait for the data changes to be replicated to the cdc client node. +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC test - create_distributed_table_concurrently delete data'); + +drop_cdc_client_subscriptions($node_cdc_client,\@workers); +done_testing(); diff --git a/src/test/cdc/t/003_cdc_parallel_insert.pl b/src/test/cdc/t/003_cdc_parallel_insert.pl new file mode 100644 index 000000000..ecc53ffac --- /dev/null +++ b/src/test/cdc/t/003_cdc_parallel_insert.pl @@ -0,0 +1,83 @@ +# CDC test for inserts during create distributed table concurrently +use strict; +use warnings; + +use Test::More; + +use lib './t'; +use cdctestlib; + +use threads; + + +# Initialize co-ordinator node +our $select_stmt = qq(SELECT * FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $add_local_meta_data_stmt = qq(SELECT citus_add_local_table_to_metadata('sensors');); +my $result = 0; + +### Create the citus cluster with coordinator and two worker nodes +our ($node_coordinator, @workers) = create_citus_cluster(2,"localhost",57636); + +our $node_cdc_client = create_node('cdc_client', 0, "localhost", 57639, ""); + +# Creeate the sensors table and ndexes. +my $initial_schema = " + CREATE TABLE sensors( + measureid integer, + eventdatetime timestamptz, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); + + CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); + ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; + CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); + CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); + CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors;"; + +$node_coordinator->safe_psql('postgres',$initial_schema); +$node_coordinator->safe_psql('postgres',$add_local_meta_data_stmt); +$node_cdc_client->safe_psql('postgres',$initial_schema); + + +create_cdc_publication_and_replication_slots_for_citus_cluster($node_coordinator,\@workers,'sensors'); +connect_cdc_client_to_citus_cluster_publications($node_coordinator,\@workers,$node_cdc_client); + +#insert data into the sensors table in the coordinator node before distributing the table. +$node_coordinator->safe_psql('postgres'," + INSERT INTO sensors +SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' +FROM generate_series(0,10)i;"); + +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator,\@workers); + +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC create_distributed_table - insert data'); + +sub create_distributed_table_thread() { + $node_coordinator->safe_psql('postgres',"SELECT create_distributed_table_concurrently('sensors', 'measureid');"); +} + +sub insert_data_into_distributed_table_thread() { + # Insert some data to the sensors table in the coordinator node. + $node_coordinator->safe_psql('postgres'," + INSERT INTO sensors + SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' + FROM generate_series(-10,-1)i;"); +} + +# Create the distributed table concurrently in a separate thread. +my $thr_create = threads->create(\&create_distributed_table_thread); +my $thr_insert = threads->create(\&insert_data_into_distributed_table_thread); +$thr_create->join(); +$thr_insert->join(); + +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator,\@workers); +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC create_distributed_table - insert data'); + + +drop_cdc_client_subscriptions($node_cdc_client,\@workers); +done_testing(); diff --git a/src/test/cdc/t/004_cdc_move_shard.pl b/src/test/cdc/t/004_cdc_move_shard.pl new file mode 100644 index 000000000..3b76c15f5 --- /dev/null +++ b/src/test/cdc/t/004_cdc_move_shard.pl @@ -0,0 +1,93 @@ +# Schema change CDC test for Citus +use strict; +use warnings; + +use Test::More; + +use lib './t'; +use cdctestlib; + + +# Initialize co-ordinator node +my $select_stmt = qq(SELECT * FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $result = 0; + +### Create the citus cluster with coordinator and two worker nodes +our ($node_coordinator, @workers) = create_citus_cluster(2,"localhost",57636); + +our $node_cdc_client = create_node('cdc_client', 0, "localhost", 57639); + +print("coordinator port: " . $node_coordinator->port() . "\n"); +print("worker0 port:" . $workers[0]->port() . "\n"); +print("worker1 port:" . $workers[1]->port() . "\n"); +print("cdc_client port:" .$node_cdc_client->port() . "\n"); + +# Creeate the sensors table and ndexes. +my $initial_schema = " + CREATE TABLE sensors( + measureid integer, + eventdatetime timestamptz, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); + + CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); + ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; + CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); + CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); + CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors;"; + +$node_coordinator->safe_psql('postgres',$initial_schema); +$node_cdc_client->safe_psql('postgres',$initial_schema); + +create_cdc_publication_and_slots_for_coordinator($node_coordinator,'sensors'); +connect_cdc_client_to_coordinator_publication($node_coordinator, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_coordinator($node_coordinator); + +create_cdc_slots_for_workers(\@workers); + +#insert data into the sensors table in the coordinator node before distributing the table. +$node_coordinator->safe_psql('postgres'," + INSERT INTO sensors +SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' +FROM generate_series(0,100)i;"); + +$node_coordinator->safe_psql('postgres',"SET citus.shard_count = 2; SELECT create_distributed_table_concurrently('sensors', 'measureid');"); + +connect_cdc_client_to_workers_publication(\@workers, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC create_distributed_table - schema change before move'); + + + +my $shard_to_move = $node_coordinator->safe_psql('postgres', + "SELECT shardid FROM citus_shards ORDER BY shardid LIMIT 1;"); +my $host1 = $node_coordinator->safe_psql('postgres', + "SELECT nodename FROM citus_shards ORDER BY shardid LIMIT 1;"); +my $port1 = $node_coordinator->safe_psql('postgres', + "SELECT nodeport FROM citus_shards ORDER BY shardid LIMIT 1;"); + +my $shard_last = $node_coordinator->safe_psql('postgres', + "SELECT shardid FROM citus_shards ORDER BY shardid DESC LIMIT 1;"); +my $host2 = $node_coordinator->safe_psql('postgres', + "SELECT nodename FROM citus_shards ORDER BY shardid DESC LIMIT 1;"); +my $port2 = $node_coordinator->safe_psql('postgres', + "SELECT nodeport FROM citus_shards ORDER BY shardid DESC LIMIT 1;"); + +my $move_params = "select citus_move_shard_placement($shard_to_move,'$host1',$port1,'$host2',$port2,'force_logical');"; +print("move_params: $move_params\n"); +$node_coordinator->safe_psql('postgres',$move_params); + +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator,\@workers); + + +#wait_for_cdc_client_to_catch_up_with_workers(\@workers); +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC create_distributed_table - schema change and move shard'); + +drop_cdc_client_subscriptions($node_cdc_client,\@workers); +done_testing(); diff --git a/src/test/cdc/t/005_cdc_reference_table_test.pl b/src/test/cdc/t/005_cdc_reference_table_test.pl new file mode 100644 index 000000000..d41fbc909 --- /dev/null +++ b/src/test/cdc/t/005_cdc_reference_table_test.pl @@ -0,0 +1,52 @@ +# Basic CDC test for create_distributed_table +use strict; +use warnings; + +use Test::More; + +use lib './t'; +use cdctestlib; + +# Initialize co-ordinator node +my $select_stmt = qq(SELECT * FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $result = 0; +my $ref_select_stmt = qq(SELECT * FROM reference_table ORDER BY measureid;); + +### Create the citus cluster with coordinator and two worker nodes +our ($node_coordinator, @workers) = create_citus_cluster(2,"localhost",57636); +our $node_cdc_client = create_node('cdc_client', 0, "localhost", 57639); + +$node_coordinator->safe_psql('postgres',"CREATE TABLE reference_table(measureid integer PRIMARY KEY);"); +$node_cdc_client->safe_psql('postgres',"CREATE TABLE reference_table(measureid integer PRIMARY KEY);"); + +create_cdc_publication_and_slots_for_coordinator($node_coordinator,'reference_table'); +connect_cdc_client_to_coordinator_publication($node_coordinator, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_coordinator($node_coordinator); + +# Create the reference table in the coordinator and cdc client nodes. +$node_coordinator->safe_psql('postgres',"SELECT create_reference_table('reference_table');"); + +create_cdc_slots_for_workers(\@workers); +connect_cdc_client_to_workers_publication(\@workers, $node_cdc_client); + +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$ref_select_stmt); +is($result, 1, 'CDC reference taable test 1'); + + +# Insert data to the reference table in the coordinator node. +$node_coordinator->safe_psql('postgres',"INSERT INTO reference_table SELECT i FROM generate_series(0,100)i;"); + +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$ref_select_stmt); +is($result, 1, 'CDC reference taable test 2'); + + +$node_coordinator->safe_psql('postgres',"INSERT INTO reference_table SELECT i FROM generate_series(101,200)i;"); + +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$ref_select_stmt); +is($result, 1, 'CDC reference taable test 3'); + +drop_cdc_client_subscriptions($node_cdc_client,\@workers); +done_testing(); diff --git a/src/test/cdc/t/006_cdc_schema_change_and_move.pl b/src/test/cdc/t/006_cdc_schema_change_and_move.pl new file mode 100644 index 000000000..cf1425a30 --- /dev/null +++ b/src/test/cdc/t/006_cdc_schema_change_and_move.pl @@ -0,0 +1,128 @@ +# Schema change CDC test for Citus +use strict; +use warnings; + +use Test::More; + +use lib './t'; +use cdctestlib; + + +# Initialize co-ordinator node +my $select_stmt = qq(SELECT * FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $select_stmt_after_drop = qq(SELECT measureid, eventdatetime, measure_data, measure_status, measure_comment FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $result = 0; + +### Create the citus cluster with coordinator and two worker nodes +our ($node_coordinator, @workers) = create_citus_cluster(2,"localhost",57636); + +our $node_cdc_client = create_node('cdc_client', 0, "localhost", 57639); + +print("coordinator port: " . $node_coordinator->port() . "\n"); +print("worker0 port:" . $workers[0]->port() . "\n"); +print("worker1 port:" . $workers[1]->port() . "\n"); +print("cdc_client port:" .$node_cdc_client->port() . "\n"); + +# Creeate the sensors table and ndexes. +my $initial_schema = " + CREATE TABLE sensors( + measureid integer, + eventdatetime timestamptz, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); + + CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); + ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; + CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); + CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); + CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors;"; + +$node_coordinator->safe_psql('postgres',$initial_schema); +$node_coordinator->safe_psql('postgres','ALTER TABLE sensors REPLICA IDENTITY FULL;'); +$node_cdc_client->safe_psql('postgres',$initial_schema); + +create_cdc_publication_and_slots_for_coordinator($node_coordinator,'sensors'); +connect_cdc_client_to_coordinator_publication($node_coordinator, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_coordinator($node_coordinator); + +#insert data into the sensors table in the coordinator node before distributing the table. +$node_coordinator->safe_psql('postgres'," + INSERT INTO sensors +SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' +FROM generate_series(0,100)i;"); + +$node_coordinator->safe_psql('postgres',"SET citus.shard_count = 2; SELECT create_distributed_table_concurrently('sensors', 'measureid');"); + +#connect_cdc_client_to_coordinator_publication($node_coordinator, $node_cdc_client); +create_cdc_slots_for_workers(\@workers); +connect_cdc_client_to_workers_publication(\@workers, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC create_distributed_table - schema change before move'); + + +$node_coordinator->safe_psql('postgres',"ALTER TABLE sensors DROP COLUMN meaure_quantity;"); + + +my $shard_to_move = $node_coordinator->safe_psql('postgres', + "SELECT shardid FROM citus_shards ORDER BY shardid LIMIT 1;"); +my $host1 = $node_coordinator->safe_psql('postgres', + "SELECT nodename FROM citus_shards ORDER BY shardid LIMIT 1;"); +my $port1 = $node_coordinator->safe_psql('postgres', + "SELECT nodeport FROM citus_shards ORDER BY shardid LIMIT 1;"); + +my $shard_last = $node_coordinator->safe_psql('postgres', + "SELECT shardid FROM citus_shards ORDER BY shardid DESC LIMIT 1;"); +my $host2 = $node_coordinator->safe_psql('postgres', + "SELECT nodename FROM citus_shards ORDER BY shardid DESC LIMIT 1;"); +my $port2 = $node_coordinator->safe_psql('postgres', + "SELECT nodeport FROM citus_shards ORDER BY shardid DESC LIMIT 1;"); + +my $move_params = "select citus_move_shard_placement($shard_to_move,'$host1',$port1,'$host2',$port2,'force_logical');"; +print("move_params: $move_params\n"); +$node_coordinator->safe_psql('postgres',$move_params); + + + +$node_coordinator->safe_psql('postgres'," + INSERT INTO sensors + SELECT i, '2020-01-05', '{}', 'A', 'I <3 Citus' + FROM generate_series(-10,-1)i;"); + + +$node_cdc_client->safe_psql('postgres',"ALTER TABLE sensors DROP COLUMN meaure_quantity;"); + +wait_for_cdc_client_to_catch_up_with_workers(\@workers); +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC create_distributed_table - schema change and move shard'); + +# Update some data in the sensors table to check the schema change handling logic in CDC decoder. +$node_coordinator->safe_psql('postgres'," +UPDATE sensors + SET + measure_status = CASE + WHEN measureid % 2 = 0 + THEN 'y' + ELSE 'n' + END;"); + +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator,\@workers); +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC create_distributed_table - update data after schema change'); + +# Update some data in the sensors table to check the schema change handling logic in CDC decoder. +$node_coordinator->safe_psql('postgres'," +DELETE FROM sensors + WHERE + measure_status = 'n';"); + +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator,\@workers); +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC create_distributed_table - delete data after schem change'); + +drop_cdc_client_subscriptions($node_cdc_client,\@workers); +done_testing(); diff --git a/src/test/cdc/t/007_cdc_undistributed_table_test.pl b/src/test/cdc/t/007_cdc_undistributed_table_test.pl new file mode 100644 index 000000000..f927b43e2 --- /dev/null +++ b/src/test/cdc/t/007_cdc_undistributed_table_test.pl @@ -0,0 +1,110 @@ +# Basic CDC test for create_distributed_table +use strict; +use warnings; + +use Test::More; + +use lib './t'; +use cdctestlib; + +# Initialize co-ordinator node +my $select_stmt = qq(SELECT * FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $result = 0; + +### Create the citus cluster with coordinator and two worker nodes +our ($node_coordinator, @workers) = create_citus_cluster(2,"localhost",57636); + +my $command = "UPDATE pg_dist_node SET shouldhaveshards = true;"; +$node_coordinator->safe_psql('postgres',$command); + +our $node_cdc_client = create_node('cdc_client', 0, "localhost", 57639); + +# Create the sensors table and ndexes. +my $initial_schema = " + CREATE TABLE sensors( + measureid integer, + eventdatetime timestamptz, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); + + CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); + ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; + CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); + CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); + CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors;"; + +$node_coordinator->safe_psql('postgres',$initial_schema); +$node_cdc_client->safe_psql('postgres',$initial_schema); + +create_cdc_publication_and_slots_for_coordinator($node_coordinator,'sensors'); +connect_cdc_client_to_coordinator_publication($node_coordinator, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_coordinator($node_coordinator); + +create_cdc_slots_for_workers(\@workers); + +# Distribut the sensors table to worker nodes. +$node_coordinator->safe_psql('postgres',"SELECT create_distributed_table('sensors', 'measureid');"); + +connect_cdc_client_to_workers_publication(\@workers, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +# Insert some data to the sensors table in the coordinator node. +$node_coordinator->safe_psql('postgres'," + INSERT INTO sensors + SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' + FROM generate_series(0,10)i;"); + +# Wait for the data changes to be replicated to the cdc client node. +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC basic test - distributed table insert data'); + + +# Update some data in the sensors table in the coordinator node. +$node_coordinator->safe_psql('postgres'," +UPDATE sensors + SET + eventdatetime=NOW(), + measure_data = jsonb_set(measure_data, '{val}', measureid::text::jsonb , TRUE), + measure_status = CASE + WHEN measureid % 2 = 0 + THEN 'y' + ELSE 'n' + END, + measure_comment= 'Comment:' || measureid::text;"); + +# Wait for the data changes to be replicated to the cdc client node. +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC basic test - distributed table update data'); + +# Delete some data from the sensors table in the coordinator node. +$node_coordinator->safe_psql('postgres'," +DELETE FROM sensors + WHERE (measureid % 2) = 0;"); + +# Wait for the data changes to be replicated to the cdc client node. +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC basic test - distributed table delete data'); + +$node_coordinator->safe_psql('postgres'," +SELECT undistribute_table('sensors',cascade_via_foreign_keys=>true);"); + +# Wait for the data changes to be replicated to the cdc client node. +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC basic test - distributed table delete data'); + +drop_cdc_client_subscriptions($node_cdc_client,\@workers); +done_testing(); diff --git a/src/test/cdc/t/008_cdc_shard_split_test.pl b/src/test/cdc/t/008_cdc_shard_split_test.pl new file mode 100644 index 000000000..6875d1855 --- /dev/null +++ b/src/test/cdc/t/008_cdc_shard_split_test.pl @@ -0,0 +1,83 @@ +# Basic CDC test for create_distributed_table +use strict; +use warnings; + +use Test::More; + +use lib './t'; +use cdctestlib; + +# Initialize co-ordinator node +my $select_stmt = qq(SELECT * FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $result = 0; +my $citus_config = " +citus.shard_count = 2 +citus.shard_replication_factor = 1 +"; +### Create the citus cluster with coordinator and two worker nodes +our ($node_coordinator, @workers) = create_citus_cluster(1,"localhost",57636, $citus_config); + +my $command = "UPDATE pg_dist_node SET shouldhaveshards = true;"; +$node_coordinator->safe_psql('postgres',$command); + +our $node_cdc_client = create_node('cdc_client', 0, "localhost", 57639); + +# Create the sensors table and ndexes. +my $initial_schema = " + CREATE TABLE sensors( + measureid integer, + eventdatetime timestamptz, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); + + CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); + ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; + CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); + CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); + CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors;"; + +$node_coordinator->safe_psql('postgres',$initial_schema); +$node_cdc_client->safe_psql('postgres',$initial_schema); + +create_cdc_publication_and_slots_for_coordinator($node_coordinator,'sensors'); +connect_cdc_client_to_coordinator_publication($node_coordinator, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_coordinator($node_coordinator); + +create_cdc_slots_for_workers(\@workers); + +# Distribut the sensors table to worker nodes. + +$node_coordinator->safe_psql('postgres',"SELECT create_distributed_table('sensors', 'measureid');"); + +connect_cdc_client_to_workers_publication(\@workers, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC split test - distributed table create data'); + +# Insert some data to the sensors table in the coordinator node. +$node_coordinator->safe_psql('postgres'," + INSERT INTO sensors + SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' + FROM generate_series(-100,100)i;"); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC split test - distributed table insert data'); + +# Wait for the data changes to be replicated to the cdc client node. +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +$node_coordinator->safe_psql('postgres'," + SELECT citus_split_shard_by_split_points(102008,ARRAY['-50'],ARRAY[1,2], 'block_writes');"); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC split test - distributed table split data'); + +drop_cdc_client_subscriptions($node_cdc_client,\@workers); +done_testing(); diff --git a/src/test/cdc/t/009_cdc_shard_split_test_non_blocking.pl b/src/test/cdc/t/009_cdc_shard_split_test_non_blocking.pl new file mode 100644 index 000000000..58077b5a1 --- /dev/null +++ b/src/test/cdc/t/009_cdc_shard_split_test_non_blocking.pl @@ -0,0 +1,83 @@ +# Basic CDC test for create_distributed_table +use strict; +use warnings; + +use Test::More; + +use lib './t'; +use cdctestlib; + +# Initialize co-ordinator node +my $select_stmt = qq(SELECT * FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $result = 0; +my $citus_config = " +citus.shard_count = 2 +citus.shard_replication_factor = 1 +"; +### Create the citus cluster with coordinator and two worker nodes +our ($node_coordinator, @workers) = create_citus_cluster(1,"localhost",57636, $citus_config); + +my $command = "UPDATE pg_dist_node SET shouldhaveshards = true;"; +$node_coordinator->safe_psql('postgres',$command); + +our $node_cdc_client = create_node('cdc_client', 0, "localhost", 57639); + +# Create the sensors table and ndexes. +my $initial_schema = " + CREATE TABLE sensors( + measureid integer, + eventdatetime timestamptz, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); + + CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); + ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; + CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); + CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); + CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors;"; + +$node_coordinator->safe_psql('postgres',$initial_schema); +$node_cdc_client->safe_psql('postgres',$initial_schema); + +create_cdc_publication_and_slots_for_coordinator($node_coordinator,'sensors'); +connect_cdc_client_to_coordinator_publication($node_coordinator, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_coordinator($node_coordinator); + +create_cdc_slots_for_workers(\@workers); + +# Distribut the sensors table to worker nodes. + +$node_coordinator->safe_psql('postgres',"SELECT create_distributed_table('sensors', 'measureid');"); + +connect_cdc_client_to_workers_publication(\@workers, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC split test - distributed table create data'); + +# Insert some data to the sensors table in the coordinator node. +$node_coordinator->safe_psql('postgres'," + INSERT INTO sensors + SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' + FROM generate_series(-100,100)i;"); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC split test - distributed table insert data'); + +# Wait for the data changes to be replicated to the cdc client node. +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +$node_coordinator->safe_psql('postgres'," + SELECT citus_split_shard_by_split_points(102008,ARRAY['-50'],ARRAY[1,2], 'force_logical');"); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC split test - distributed table split data'); + +drop_cdc_client_subscriptions($node_cdc_client,\@workers); +done_testing(); diff --git a/src/test/cdc/t/010_cdc_shard_split_parallel_insert.pl b/src/test/cdc/t/010_cdc_shard_split_parallel_insert.pl new file mode 100644 index 000000000..4ac75244a --- /dev/null +++ b/src/test/cdc/t/010_cdc_shard_split_parallel_insert.pl @@ -0,0 +1,106 @@ +# Basic CDC test for create_distributed_table +use strict; +use warnings; + +use Test::More; + +use lib './t'; +use cdctestlib; + +use threads; + +# Initialize co-ordinator node +my $select_stmt = qq(SELECT * FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $result = 0; +my $citus_config = " +citus.shard_count = 2 +citus.shard_replication_factor = 1 +"; +### Create the citus cluster with coordinator and two worker nodes +our ($node_coordinator, @workers) = create_citus_cluster(1,"localhost",57636, $citus_config); + +my $command = "UPDATE pg_dist_node SET shouldhaveshards = true;"; +$node_coordinator->safe_psql('postgres',$command); + +our $node_cdc_client = create_node('cdc_client', 0, "localhost", 57639); + +# Create the sensors table and ndexes. +my $initial_schema = " + CREATE TABLE sensors( + measureid integer, + eventdatetime timestamptz, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); + + CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); + ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; + CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); + CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); + CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors;"; + +$node_coordinator->safe_psql('postgres',$initial_schema); +$node_cdc_client->safe_psql('postgres',$initial_schema); + +create_cdc_publication_and_slots_for_coordinator($node_coordinator,'sensors'); +connect_cdc_client_to_coordinator_publication($node_coordinator, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_coordinator($node_coordinator); + +create_cdc_slots_for_workers(\@workers); + +# Distribut the sensors table to worker nodes. +$node_coordinator->safe_psql('postgres',"SELECT create_distributed_table('sensors', 'measureid');"); + +connect_cdc_client_to_workers_publication(\@workers, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC split test - distributed table create data'); + +# Insert some data to the sensors table in the coordinator node. +$node_coordinator->safe_psql('postgres'," + INSERT INTO sensors + SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' + FROM generate_series(-100,100)i;"); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC split test - distributed table insert data'); + + +sub insert_data_into_distributed_table_thread() { + # Insert some data to the sensors table in the coordinator node. + $node_coordinator->safe_psql('postgres'," + INSERT INTO sensors + SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' + FROM generate_series(101,200)i;"); +} + +sub split_distributed_table_thread() { + $node_coordinator->safe_psql('postgres'," + SELECT citus_split_shard_by_split_points(102008,ARRAY['-50'],ARRAY[1,2], 'force_logical');"); +} + +# Create the distributed table concurrently in a separate thread. +my $thr_create = threads->create(\&split_distributed_table_thread); + +# Insert some data to the sensors table in the coordinator node while the table is being distributed. +my $thr_insert = threads->create(\&insert_data_into_distributed_table_thread); + +# Wait for the threads to finish. +$thr_create->join(); +$thr_insert->join(); + +# Wait for the data changes to be replicated to the cdc client node. +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC split test - distributed table split data'); + +drop_cdc_client_subscriptions($node_cdc_client,\@workers); +done_testing(); diff --git a/src/test/cdc/t/011_cdc_alter_distributed_table.pl b/src/test/cdc/t/011_cdc_alter_distributed_table.pl new file mode 100644 index 000000000..2fbcd6429 --- /dev/null +++ b/src/test/cdc/t/011_cdc_alter_distributed_table.pl @@ -0,0 +1,95 @@ +# Basic CDC test for create_distributed_table +use strict; +use warnings; + +use Test::More; + +use lib './t'; +use cdctestlib; + +# Initialize co-ordinator node +my $select_stmt = qq(SELECT * FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $add_local_meta_data_stmt = qq(SELECT citus_add_local_table_to_metadata('sensors');); +my $result = 0; +my $citus_config = " +citus.shard_count = 2 +citus.shard_replication_factor = 1 +"; +### Create the citus cluster with coordinator and two worker nodes +our ($node_coordinator, @workers) = create_citus_cluster(1,"localhost",57636, $citus_config); + +my $command = "UPDATE pg_dist_node SET shouldhaveshards = true;"; +$node_coordinator->safe_psql('postgres',$command); + +our $node_cdc_client = create_node('cdc_client', 0, "localhost", 57639); + +# Create the sensors table and ndexes. +my $initial_schema = " + CREATE TABLE sensors( + measureid integer, + eventdatetime timestamptz, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); + + CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); + ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; + CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); + CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); + CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors;"; + +$node_coordinator->safe_psql('postgres',$initial_schema); +$node_coordinator->safe_psql('postgres',$add_local_meta_data_stmt); +$node_cdc_client->safe_psql('postgres',$initial_schema); + +create_cdc_publication_and_replication_slots_for_citus_cluster($node_coordinator,\@workers,'sensors'); +connect_cdc_client_to_citus_cluster_publications($node_coordinator,\@workers,$node_cdc_client); + +# Distribut the sensors table to worker nodes. +$node_coordinator->safe_psql('postgres',"SELECT create_distributed_table_concurrently('sensors', 'measureid');"); + +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator,\@workers); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC split test - distributed table create data'); + +# Insert some data to the sensors table in the coordinator node. +$node_coordinator->safe_psql('postgres'," + SELECT alter_distributed_table('sensors', shard_count:=6, cascade_to_colocated:=true);"); + +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +# Compare the data in the coordinator and cdc client nodes. +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC split test - alter distributed table '); + +#$node_cdc_client->safe_psql("postgres","alter subscription cdc_subscription refresh publication;"); +$node_cdc_client->safe_psql("postgres","alter subscription cdc_subscription_1 refresh publication;"); + + +#Drop the CDC client subscription and recreate them , since the +#alter_distributed_table has changed the Oid of the distributed table. +#So the CDC client has to create Oid to table mappings again for +#CDC to work again. +drop_cdc_client_subscriptions($node_cdc_client,\@workers); +create_cdc_publication_and_replication_slots_for_citus_cluster($node_coordinator,\@workers,'sensors'); +connect_cdc_client_to_citus_cluster_publications($node_coordinator,\@workers,$node_cdc_client); + +# Insert some data to the sensors table in the coordinator node. +$node_coordinator->safe_psql('postgres'," + INSERT INTO sensors + SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' + FROM generate_series(0,10)i;"); + +# Wait for the data changes to be replicated to the cdc client node. +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC basic test - distributed table insert data'); + +drop_cdc_client_subscriptions($node_cdc_client,\@workers); + +done_testing(); diff --git a/src/test/cdc/t/012_cdc_restart_test.pl b/src/test/cdc/t/012_cdc_restart_test.pl new file mode 100644 index 000000000..81a129c5a --- /dev/null +++ b/src/test/cdc/t/012_cdc_restart_test.pl @@ -0,0 +1,87 @@ +# Basic CDC test for create_distributed_table +use strict; +use warnings; + +use Test::More; + +use lib './t'; +use cdctestlib; + +# Initialize co-ordinator node +my $select_stmt = qq(SELECT * FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $result = 0; + +### Create the citus cluster with coordinator and two worker nodes +our ($node_coordinator, @workers) = create_citus_cluster(2,"localhost",57636); + +our $node_cdc_client = create_node('cdc_client', 0, "localhost", 57639); + +# Create the sensors table and ndexes. +my $initial_schema = " + CREATE TABLE sensors( + measureid integer, + eventdatetime timestamptz, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); + + CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); + ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; + CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); + CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); + CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors;"; + +$node_coordinator->safe_psql('postgres',$initial_schema); +$node_cdc_client->safe_psql('postgres',$initial_schema); + +create_cdc_publication_and_slots_for_coordinator($node_coordinator,'sensors'); +connect_cdc_client_to_coordinator_publication($node_coordinator, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_coordinator($node_coordinator); + +create_cdc_slots_for_workers(\@workers); + +# Distribut the sensors table to worker nodes. +$node_coordinator->safe_psql('postgres',"SELECT create_distributed_table('sensors', 'measureid');"); + +connect_cdc_client_to_workers_publication(\@workers, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC restart test - distributed table creation'); + + +# Insert some data to the sensors table in the coordinator node. +$node_coordinator->safe_psql('postgres'," + INSERT INTO sensors + SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' + FROM generate_series(0,10)i;"); + +# Wait for the data changes to be replicated to the cdc client node. +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC restart test - distributed table insert data'); + + +print("stopping worker 0"); +$workers[0]->stop(); +print("starting worker 0 againg.."); +$workers[0]->start(); + + +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +# Insert some data to the sensors table in the coordinator node. +$node_coordinator->safe_psql('postgres'," + INSERT INTO sensors + SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' + FROM generate_series(11,20)i;"); + + +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC restart test - distributed table after restart'); + + +drop_cdc_client_subscriptions($node_cdc_client,\@workers); +done_testing(); diff --git a/src/test/cdc/t/013_cdc_drop_last_column_for_one_shard.pl b/src/test/cdc/t/013_cdc_drop_last_column_for_one_shard.pl new file mode 100644 index 000000000..ec8ccb718 --- /dev/null +++ b/src/test/cdc/t/013_cdc_drop_last_column_for_one_shard.pl @@ -0,0 +1,89 @@ +# Schema change CDC test for Citus +use strict; +use warnings; + +use Test::More; + +use lib './t'; +use cdctestlib; + + +# Initialize co-ordinator node +my $select_stmt = qq(SELECT * FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $select_stmt_after_drop = qq(SELECT measureid, eventdatetime, measure_data, meaure_quantity, measure_status FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $result = 0; + +### Create the citus cluster with coordinator and two worker nodes +our ($node_coordinator, @workers) = create_citus_cluster(2,"localhost",57636); + +our $node_cdc_client = create_node('cdc_client', 0, "localhost", 57639); + +print("coordinator port: " . $node_coordinator->port() . "\n"); +print("worker0 port:" . $workers[0]->port() . "\n"); +print("worker1 port:" . $workers[1]->port() . "\n"); +print("cdc_client port:" .$node_cdc_client->port() . "\n"); + +# Creeate the sensors table and ndexes. +my $initial_schema = " + CREATE TABLE sensors( + measureid integer, + eventdatetime timestamptz, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); + + CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); + ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; + CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); + CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); + CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors;"; + +$node_coordinator->safe_psql('postgres',$initial_schema); +$node_coordinator->safe_psql('postgres','ALTER TABLE sensors REPLICA IDENTITY FULL;'); +$node_cdc_client->safe_psql('postgres',$initial_schema); + +create_cdc_publication_and_slots_for_coordinator($node_coordinator,'sensors'); +connect_cdc_client_to_coordinator_publication($node_coordinator, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_coordinator($node_coordinator); + +#insert data into the sensors table in the coordinator node before distributing the table. +$node_coordinator->safe_psql('postgres'," + INSERT INTO sensors +SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' +FROM generate_series(0,100)i;"); + +$node_coordinator->safe_psql('postgres',"SET citus.shard_count = 2; SELECT create_distributed_table_concurrently('sensors', 'measureid');"); + +#connect_cdc_client_to_coordinator_publication($node_coordinator, $node_cdc_client); +create_cdc_slots_for_workers(\@workers); +connect_cdc_client_to_workers_publication(\@workers, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC create_distributed_table - schema change before move'); + + +my $shard_id = $workers[1]->safe_psql('postgres', + "SELECT shardid FROM citus_shards ORDER BY shardid LIMIT 1;"); + +my $shard_to_drop_column = "sensors_" . $shard_id; + + +$workers[1]->safe_psql('postgres',"ALTER TABLE $shard_to_drop_column DROP COLUMN measure_comment;"); + + +$workers[1]->safe_psql('postgres'," + INSERT INTO sensors + SELECT i, '2020-01-05', '{}', 11011.10, 'A' + FROM generate_series(-10,-1)i;"); + + +wait_for_cdc_client_to_catch_up_with_workers(\@workers); +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt_after_drop); +is($result, 1, 'CDC create_distributed_table - schema change and move shard'); + + +drop_cdc_client_subscriptions($node_cdc_client,\@workers); +done_testing(); diff --git a/src/test/cdc/t/014_cdc_with_table_like_shard_name.pl b/src/test/cdc/t/014_cdc_with_table_like_shard_name.pl new file mode 100644 index 000000000..c96fea921 --- /dev/null +++ b/src/test/cdc/t/014_cdc_with_table_like_shard_name.pl @@ -0,0 +1,90 @@ +# Schema change CDC test for Citus +use strict; +use warnings; + +use Test::More; + +use lib './t'; +use cdctestlib; + + +# Initialize co-ordinator node +my $select_stmt = qq(SELECT * FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $select_stmt_after_drop = qq(SELECT measureid, eventdatetime, measure_data, meaure_quantity, measure_status FROM sensors ORDER BY measureid, eventdatetime, measure_data;); +my $result = 0; + +### Create the citus cluster with coordinator and two worker nodes +our ($node_coordinator, @workers) = create_citus_cluster(2,"localhost",57636); + +our $node_cdc_client = create_node('cdc_client', 0, "localhost", 57639); + +print("coordinator port: " . $node_coordinator->port() . "\n"); +print("worker0 port:" . $workers[0]->port() . "\n"); +print("worker1 port:" . $workers[1]->port() . "\n"); +print("cdc_client port:" .$node_cdc_client->port() . "\n"); + +# Creeate the sensors table and ndexes. +my $initial_schema = " + CREATE TABLE sensors( + measureid integer, + eventdatetime timestamptz, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); + + CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); + ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; + CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); + CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); + CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors;"; + +my $shard_like_table_schema = " + CREATE TABLE data_100008( + id integer, + data integer, + PRIMARY KEY (data));"; + +$node_coordinator->safe_psql('postgres',$initial_schema); +$node_coordinator->safe_psql('postgres',$shard_like_table_schema); +$node_coordinator->safe_psql('postgres','ALTER TABLE sensors REPLICA IDENTITY FULL;'); + +$node_cdc_client->safe_psql('postgres',$initial_schema); +$node_cdc_client->safe_psql('postgres',$shard_like_table_schema); + +create_cdc_publication_and_slots_for_coordinator($node_coordinator,'sensors,data_100008'); +connect_cdc_client_to_coordinator_publication($node_coordinator, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_coordinator($node_coordinator); + +#insert data into the sensors table in the coordinator node before distributing the table. +$node_coordinator->safe_psql('postgres'," + INSERT INTO sensors +SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' +FROM generate_series(0,100)i;"); + +$node_coordinator->safe_psql('postgres',"SET citus.shard_count = 2; SELECT create_distributed_table_concurrently('sensors', 'measureid');"); + +#connect_cdc_client_to_coordinator_publication($node_coordinator, $node_cdc_client); +create_cdc_slots_for_workers(\@workers); +connect_cdc_client_to_workers_publication(\@workers, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_citus_cluster($node_coordinator, \@workers); + +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC create_distributed_table - basic test'); + +$workers[1]->safe_psql('postgres',$shard_like_table_schema); +$workers[1]->safe_psql('postgres','\d'); + +$workers[1]->safe_psql('postgres'," + INSERT INTO data_100008 + SELECT i, i*10 + FROM generate_series(-10,10)i;"); + + +wait_for_cdc_client_to_catch_up_with_workers(\@workers); +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt_after_drop); +is($result, 1, 'CDC create_distributed_table - normal table with name like shard'); + +drop_cdc_client_subscriptions($node_cdc_client,\@workers); +done_testing(); diff --git a/src/test/cdc/t/015_cdc_without_citus.pl b/src/test/cdc/t/015_cdc_without_citus.pl new file mode 100644 index 000000000..4f3db68ca --- /dev/null +++ b/src/test/cdc/t/015_cdc_without_citus.pl @@ -0,0 +1,53 @@ +# Schema change CDC test for Citus +use strict; +use warnings; + +use Test::More; + +use lib './t'; +use cdctestlib; + + +# Initialize co-ordinator node +my $select_stmt = qq(SELECT * FROM data_100008 ORDER BY id;); +my $result = 0; + +### Create the citus cluster with coordinator and two worker nodes +our ($node_coordinator, @workers) = create_citus_cluster(2,"localhost",57636); + +our $node_cdc_client = create_node('cdc_client', 0, "localhost", 57639); + +print("coordinator port: " . $node_coordinator->port() . "\n"); +print("worker0 port:" . $workers[0]->port() . "\n"); +print("worker1 port:" . $workers[1]->port() . "\n"); +print("cdc_client port:" .$node_cdc_client->port() . "\n"); + +my $initial_schema = " + CREATE TABLE data_100008( + id integer, + data integer, + PRIMARY KEY (data));"; + +$node_coordinator->safe_psql('postgres','DROP extension citus'); +$node_coordinator->safe_psql('postgres',$initial_schema); +$node_coordinator->safe_psql('postgres','ALTER TABLE data_100008 REPLICA IDENTITY FULL;'); + +$node_cdc_client->safe_psql('postgres',$initial_schema); + + +create_cdc_publication_and_slots_for_coordinator($node_coordinator,'data_100008'); +connect_cdc_client_to_coordinator_publication($node_coordinator, $node_cdc_client); +wait_for_cdc_client_to_catch_up_with_coordinator($node_coordinator); + +#insert data into the sensors table in the coordinator node before distributing the table. +$node_coordinator->safe_psql('postgres'," + INSERT INTO data_100008 + SELECT i, i*10 + FROM generate_series(-10,10)i;"); + + +$result = compare_tables_in_different_nodes($node_coordinator,$node_cdc_client,'postgres',$select_stmt); +is($result, 1, 'CDC create_distributed_table - basic test'); + +$node_cdc_client->safe_psql('postgres',"drop subscription cdc_subscription"); +done_testing(); diff --git a/src/test/cdc/t/cdctestlib.pm b/src/test/cdc/t/cdctestlib.pm new file mode 100644 index 000000000..782da1198 --- /dev/null +++ b/src/test/cdc/t/cdctestlib.pm @@ -0,0 +1,331 @@ +use strict; +use warnings; + +my $pg_major_version = int($ENV{'pg_major_version'}); +print("working with PG major version : $pg_major_version\n"); +if ($pg_major_version >= 15) { + eval "use PostgreSQL::Test::Cluster"; + eval "use PostgreSQL::Test::Utils"; +} else { + eval "use PostgresNode"; +} + + +#use PostgresNode; +use DBI; + +our $NODE_TYPE_COORDINATOR = 1; +our $NODE_TYPE_WORKER = 2; +our $NODE_TYPE_CDC_CLIENT = 3; + +sub compare_tables_in_different_nodes +{ + my $result = 1; + my ($node1, $node2, $dbname, $stmt) = @_; + + # Connect to the first database node + my $dbh1 = DBI->connect("dbi:Pg:" . $node1->connstr($dbname)); + + # Connect to the second database node + my $dbh2 = DBI->connect("dbi:Pg:" . $node2->connstr($dbname)); + + # Define the SQL query for the first database node + my $sth1 = $dbh1->prepare($stmt); + $sth1->execute(); + + # Define the SQL query for the second database node + my $sth2 = $dbh2->prepare($stmt); + $sth2->execute(); + + # Get the field names for the table + my @field_names = @{$sth2->{NAME}}; + + #$sth1->dump_results(); + #$sth2->dump_results(); + + our @row1, our @row2; + + # Use a cursor to iterate over the first database node's data + while (1) { + + @row1 = $sth1->fetchrow_array(); + @row2 = $sth2->fetchrow_array(); + #print("row1: @row1\n"); + #print("row2: @row2\n"); + + # Use a cursor to iterate over the second database node's data + if (@row1 and @row2) { + #print("row1: @row1\n"); + #print("row2: @row2\n"); + my $field_count_row1 = scalar @row1; + my $field_count_row2 = scalar @row2; + if ($field_count_row1 != $field_count_row2) { + print "Field count mismatch: $field_count_row1 != $field_count_row2 \n"; + print "First row: @row1\n"; + #print "Second row: @row2\n"; + for (my $i = 0; $i < scalar @row2; $i++) { + print("Field $i, field name: $field_names[$i], value: $row2[$i] \n"); + } + $result = 0; + last; + } + # Compare the data in each field in each row of the two nodes + for (my $i = 0; $i < scalar @row1; $i++) { + if ($row1[$i] ne $row2[$i]) { + print "Data mismatch in field '$field_names[$i]'\n"; + print "$row1[$i] != $row2[$i]\n"; + print "First row: @row1\n"; + print "Second row: @row2\n"; + $result = 0; + last; + } + } + } elsif (@row1 and !@row2) { + print "First node has more rows than the second node\n"; + $result = 0; + last; + } elsif (!@row1 and @row2) { + print "Second node has more rows than the first node\n"; + $result = 0; + last; + } else { + last; + } + } + + $sth1->finish(); + $sth2->finish(); + $dbh1->disconnect(); + $dbh2->disconnect(); + return $result; +} + +sub create_node { + my ($name,$node_type,$host, $port, $config) = @_; + if (!defined($config)) { + $config = "" + } + + our $node; + + if ($pg_major_version >= 15) { + $PostgreSQL::Test::Cluster::use_unix_sockets = 0; + $PostgreSQL::Test::Cluster::use_tcp = 1; + $PostgreSQL::Test::Cluster::test_pghost = 'localhost'; + my %params = ( "port" => $port, "host" => "localhost"); + $node = PostgreSQL::Test::Cluster->new($name, %params); + } else { + $PostgresNode::use_tcp = 1; + $PostgresNode::test_pghost = '127.0.0.1'; + my %params = ( "port" => $port, "host" => "localhost"); + $node = get_new_node($name, %params); + } + print("node's port:" . $node->port . "\n"); + + $port += 1; + + my $citus_config_options = " +max_connections = 100 +max_wal_senders = 100 +max_replication_slots = 100 +citus.enable_change_data_capture = on +log_statement = 'all' +citus.override_table_visibility = off"; + + if ($config ne "") { + $citus_config_options = $citus_config_options . $config + } + + my $client_config_options = " +max_connections = 100 +max_wal_senders = 100 +max_replication_slots = 100 +"; + $node->init(allows_streaming => 'logical'); + if ($node_type == $NODE_TYPE_COORDINATOR || $node_type == $NODE_TYPE_WORKER) { + $node->append_conf("postgresql.conf",$citus_config_options); + } else { + $node->append_conf("postgresql.conf",$citus_config_options); + } + + $node->start(); + + if ($node_type == $NODE_TYPE_COORDINATOR || $node_type == $NODE_TYPE_WORKER) { + $node->safe_psql('postgres', "CREATE EXTENSION citus;"); + my $value = $node->safe_psql('postgres', "SHOW citus.enable_change_data_capture;"); + print("citus.enable_change_data_capture value is $value\n") + } + + return $node; +} + +# Create a Citus cluster with the given number of workers +sub create_citus_cluster { + my ($no_workers,$host,$port,$citus_config) = @_; + my @workers = (); + my $node_coordinator; + print("citus_config :", $citus_config); + if ($citus_config ne "") { + $node_coordinator = create_node('coordinator', $NODE_TYPE_COORDINATOR,$host, $port, $citus_config); + } else { + $node_coordinator = create_node('coordinator', $NODE_TYPE_COORDINATOR,$host, $port); + } + my $coord_host = $node_coordinator->host(); + my $coord_port = $node_coordinator->port(); + $node_coordinator->safe_psql('postgres',"SELECT pg_catalog.citus_set_coordinator_host('$coord_host', $coord_port);"); + for (my $i = 0; $i < $no_workers; $i++) { + $port = $port + 1; + my $node_worker; + if ($citus_config ne "") { + $node_worker = create_node("worker$i", $NODE_TYPE_WORKER,"localhost", $port, $citus_config); + } else { + $node_worker = create_node("worker$i", $NODE_TYPE_WORKER,"localhost", $port); + } + my $node_worker_host = $node_worker->host(); + my $node_worker_port = $node_worker->port(); + $node_coordinator->safe_psql('postgres',"SELECT pg_catalog.citus_add_node('$node_worker_host', $node_worker_port);"); + push @workers, $node_worker; + } + return $node_coordinator, @workers; +} + +sub create_cdc_publication_and_replication_slots_for_citus_cluster { + my $node_coordinator = $_[0]; + my $workersref = $_[1]; + my $table_names = $_[2]; + + create_cdc_publication_and_slots_for_coordinator($node_coordinator, $table_names); + create_cdc_slots_for_workers($workersref); +} + +sub create_cdc_publication_and_slots_for_coordinator { + my $node_coordinator = $_[0]; + my $table_names = $_[1]; + print("node node_coordinator connstr: \n" . $node_coordinator->connstr()); + my $pub = $node_coordinator->safe_psql('postgres',"SELECT * FROM pg_publication WHERE pubname = 'cdc_publication';"); + if ($pub ne "") { + $node_coordinator->safe_psql('postgres',"DROP PUBLICATION IF EXISTS cdc_publication;"); + } + $node_coordinator->safe_psql('postgres',"CREATE PUBLICATION cdc_publication FOR TABLE $table_names;"); + $node_coordinator->safe_psql('postgres',"SELECT pg_catalog.pg_create_logical_replication_slot('cdc_replication_slot','pgoutput',false)"); +} + +sub create_cdc_slots_for_workers { + my $workersref = $_[0]; + for (@$workersref) { + my $slot = $_->safe_psql('postgres',"select * from pg_replication_slots where slot_name = 'cdc_replication_slot';"); + if ($slot ne "") { + $_->safe_psql('postgres',"SELECT pg_catalog.pg_drop_replication_slot('cdc_replication_slot');"); + } + $_->safe_psql('postgres',"SELECT pg_catalog.pg_create_logical_replication_slot('cdc_replication_slot','pgoutput',false)"); + } +} + + +sub connect_cdc_client_to_citus_cluster_publications { + my $node_coordinator = $_[0]; + my $workersref = $_[1]; + my $node_cdc_client = $_[2]; + my $num_args = scalar(@_); + + + if ($num_args > 3) { + my $copy_arg = $_[3]; + connect_cdc_client_to_coordinator_publication($node_coordinator,$node_cdc_client, $copy_arg); + } else { + connect_cdc_client_to_coordinator_publication($node_coordinator,$node_cdc_client); + } + connect_cdc_client_to_workers_publication($workersref, $node_cdc_client); +} + +sub connect_cdc_client_to_coordinator_publication { + my $node_coordinator = $_[0]; + my $node_cdc_client = $_[1]; + my $num_args = scalar(@_); + my $copy_data = ""; + if ($num_args > 2) { + my $copy_arg = $_[2]; + $copy_data = 'copy_data='. $copy_arg; + } else { + $copy_data = 'copy_data=false'; + } + + my $conn_str = $node_coordinator->connstr() . " dbname=postgres"; + my $subscription = 'cdc_subscription'; + print "creating subscription $subscription for coordinator: $conn_str\n"; + $node_cdc_client->safe_psql('postgres'," + CREATE SUBSCRIPTION $subscription + CONNECTION '$conn_str' + PUBLICATION cdc_publication + WITH ( + create_slot=false, + enabled=true, + slot_name=cdc_replication_slot," + . $copy_data. ");" + ); +} + +sub connect_cdc_client_to_workers_publication { + my $workersref = $_[0]; + my $node_cdc_client = $_[1]; + my $i = 1; + for (@$workersref) { + my $conn_str = $_->connstr() . " dbname=postgres"; + my $subscription = 'cdc_subscription_' . $i; + print "creating subscription $subscription for node$i: $conn_str\n"; + my $subscription_stmt = "CREATE SUBSCRIPTION $subscription + CONNECTION '$conn_str' + PUBLICATION cdc_publication + WITH ( + create_slot=false, + enabled=true, + slot_name=cdc_replication_slot, + copy_data=false); + "; + + $node_cdc_client->safe_psql('postgres',$subscription_stmt); + $i++; + } +} + +sub wait_for_cdc_client_to_catch_up_with_citus_cluster { + my $node_coordinator = $_[0]; + my ($workersref) = $_[1]; + + my $subscription = 'cdc_subscription'; + print "coordinator: waiting for cdc client subscription $subscription to catch up\n"; + $node_coordinator->wait_for_catchup($subscription); + wait_for_cdc_client_to_catch_up_with_workers($workersref); +} + +sub wait_for_cdc_client_to_catch_up_with_coordinator { + my $node_coordinator = $_[0]; + my $subscription = 'cdc_subscription'; + print "coordinator: waiting for cdc client subscription $subscription to catch up\n"; + $node_coordinator->wait_for_catchup($subscription); +} + +sub wait_for_cdc_client_to_catch_up_with_workers { + my ($workersref) = $_[0]; + my $i = 1; + for (@$workersref) { + my $subscription = 'cdc_subscription_' . $i; + print "node$i: waiting for cdc client subscription $subscription to catch up\n"; + $_->wait_for_catchup($subscription); + $i++; + } +} + +sub drop_cdc_client_subscriptions { + my $node = $_[0]; + my ($workersref) = $_[1]; + + $node->safe_psql('postgres',"drop subscription cdc_subscription"); + my $i = 1; + for (@$workersref) { + my $subscription = 'cdc_subscription_' . $i; + $node->safe_psql('postgres',"drop subscription " . $subscription); + $i++; + } +} + diff --git a/src/test/columnar_freezing/postgresql.conf b/src/test/columnar_freezing/postgresql.conf deleted file mode 100644 index 39521cc33..000000000 --- a/src/test/columnar_freezing/postgresql.conf +++ /dev/null @@ -1,7 +0,0 @@ -shared_preload_libraries=citus -shared_preload_libraries='citus' -vacuum_freeze_min_age = 50000 -vacuum_freeze_table_age = 50000 -synchronous_commit = off -fsync = off - diff --git a/src/test/columnar_freezing/t/001_columnar_freezing.pl b/src/test/columnar_freezing/t/001_columnar_freezing.pl deleted file mode 100644 index 01e8346cf..000000000 --- a/src/test/columnar_freezing/t/001_columnar_freezing.pl +++ /dev/null @@ -1,52 +0,0 @@ -# Minimal test testing streaming replication -use strict; -use warnings; -use PostgreSQL::Test::Cluster; -use PostgreSQL::Test::Utils; -use Test::More tests => 2; - -# Initialize single node -my $node_one = PostgreSQL::Test::Cluster->new('node_one'); -$node_one->init(); -$node_one->start; - -# initialize the citus extension -$node_one->safe_psql('postgres', "CREATE EXTENSION citus;"); - -# create columnar table and insert simple data to verify the data survives a crash -$node_one->safe_psql('postgres', " -CREATE TABLE test_row(i int); -INSERT INTO test_row VALUES (1); -CREATE TABLE test_columnar_freeze(i int) USING columnar WITH(autovacuum_enabled=false); -INSERT INTO test_columnar_freeze VALUES (1); -"); - -my $ten_thousand_updates = ""; - -foreach (1..10000) { - $ten_thousand_updates .= "UPDATE test_row SET i = i + 1;\n"; -} - -# 70K updates -foreach (1..7) { - $node_one->safe_psql('postgres', $ten_thousand_updates); -} - -my $result = $node_one->safe_psql('postgres', " -select age(relfrozenxid) < 70000 as was_frozen - from pg_class where relname='test_columnar_freeze'; -"); -print "node one count: $result\n"; -is($result, qq(f), 'columnar table was not frozen'); - -$node_one->safe_psql('postgres', 'VACUUM FREEZE test_columnar_freeze;'); - -$result = $node_one->safe_psql('postgres', " -select age(relfrozenxid) < 70000 as was_frozen - from pg_class where relname='test_columnar_freeze'; -"); -print "node one count: $result\n"; -is($result, qq(t), 'columnar table was frozen'); - -$node_one->stop('fast'); - diff --git a/src/test/columnar_freezing/t_pg13_pg14/001_columnar_freezing_pg13_pg14.pl b/src/test/columnar_freezing/t_pg13_pg14/001_columnar_freezing_pg13_pg14.pl deleted file mode 100644 index 1985da2a5..000000000 --- a/src/test/columnar_freezing/t_pg13_pg14/001_columnar_freezing_pg13_pg14.pl +++ /dev/null @@ -1,52 +0,0 @@ -# Minimal test testing streaming replication -use strict; -use warnings; -use PostgresNode; -use TestLib; -use Test::More tests => 2; - -# Initialize single node -my $node_one = get_new_node('node_one'); -$node_one->init(); -$node_one->start; - -# initialize the citus extension -$node_one->safe_psql('postgres', "CREATE EXTENSION citus;"); - -# create columnar table and insert simple data to verify the data survives a crash -$node_one->safe_psql('postgres', " -CREATE TABLE test_row(i int); -INSERT INTO test_row VALUES (1); -CREATE TABLE test_columnar_freeze(i int) USING columnar WITH(autovacuum_enabled=false); -INSERT INTO test_columnar_freeze VALUES (1); -"); - -my $ten_thousand_updates = ""; - -foreach (1..10000) { - $ten_thousand_updates .= "UPDATE test_row SET i = i + 1;\n"; -} - -# 70K updates -foreach (1..7) { - $node_one->safe_psql('postgres', $ten_thousand_updates); -} - -my $result = $node_one->safe_psql('postgres', " -select age(relfrozenxid) < 70000 as was_frozen - from pg_class where relname='test_columnar_freeze'; -"); -print "node one count: $result\n"; -is($result, qq(f), 'columnar table was not frozen'); - -$node_one->safe_psql('postgres', 'VACUUM FREEZE test_columnar_freeze;'); - -$result = $node_one->safe_psql('postgres', " -select age(relfrozenxid) < 70000 as was_frozen - from pg_class where relname='test_columnar_freeze'; -"); -print "node one count: $result\n"; -is($result, qq(t), 'columnar table was frozen'); - -$node_one->stop('fast'); - diff --git a/src/test/recovery/.gitignore b/src/test/recovery/.gitignore deleted file mode 100644 index 871e943d5..000000000 --- a/src/test/recovery/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -# Generated by test suite -/tmp_check/ diff --git a/src/test/recovery/Makefile b/src/test/recovery/Makefile deleted file mode 100644 index 03e18fa0c..000000000 --- a/src/test/recovery/Makefile +++ /dev/null @@ -1,58 +0,0 @@ -#------------------------------------------------------------------------- -# -# Makefile for src/test/recovery -# -# Losely based on the makefile found in postgres' src/test/recovery. -# We need to define our own invocation of prove to pass the correct path -# to pg_regress and include citus in the shared preload libraries. -# -#------------------------------------------------------------------------- - -subdir = src/test/recovery -top_builddir = ../../.. -include $(top_builddir)/Makefile.global - -# In PG15, Perl test modules have been moved to a new namespace -# new() and get_new_node() methods have been unified to 1 method: new() -# Relevant PG commits 201a76183e2056c2217129e12d68c25ec9c559c8 -# b3b4d8e68ae83f432f43f035c7eb481ef93e1583 -pg_version = $(shell $(PG_CONFIG) --version 2>/dev/null) -pg_whole_version = $(shell echo "$(pg_version)"| sed -e 's/^PostgreSQL \([0-9]*\)\(\.[0-9]*\)\{0,1\}\(.*\)/\1\2/') -pg_major_version = $(shell echo "$(pg_whole_version)"| sed -e 's/^\([0-9]\{2\}\)\(.*\)/\1/') - -# for now, we only have a single test file -# due to the above explanation, we ended up separating the test paths for -# different versions. If you need to add new test files, be careful to add both versions -ifeq ($(pg_major_version),13) - test_path = t_pg13_pg14/*.pl -else ifeq ($(pg_major_version),14) - test_path = t_pg13_pg14/*.pl -else - test_path = t/*.pl -endif - -# copied from pgxs/Makefile.global to use postgres' abs build dir for pg_regress -ifeq ($(enable_tap_tests),yes) - -define citus_prove_installcheck -rm -rf '$(CURDIR)'/tmp_check -$(MKDIR_P) '$(CURDIR)'/tmp_check -cd $(srcdir) && \ -TESTDIR='$(CURDIR)' \ -PATH="$(bindir):$$PATH" \ -PGPORT='6$(DEF_PGPORT)' \ -top_builddir='$(CURDIR)/$(top_builddir)' \ -PG_REGRESS='$(pgxsdir)/src/test/regress/pg_regress' \ -TEMP_CONFIG='$(CURDIR)'/postgresql.conf \ -$(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if $(PROVE_TESTS),$(PROVE_TESTS),$(test_path)) -endef - -else -citus_prove_installcheck = @echo "TAP tests not enabled when postgres was compiled" -endif - -installcheck: - $(citus_prove_installcheck) - -clean distclean maintainer-clean: - rm -rf tmp_check diff --git a/src/test/recovery/postgresql.conf b/src/test/recovery/postgresql.conf deleted file mode 100644 index f9d205b38..000000000 --- a/src/test/recovery/postgresql.conf +++ /dev/null @@ -1 +0,0 @@ -shared_preload_libraries=citus diff --git a/src/test/recovery/t/001_columnar_crash_recovery.pl b/src/test/recovery/t/001_columnar_crash_recovery.pl deleted file mode 100644 index 7dee21dd1..000000000 --- a/src/test/recovery/t/001_columnar_crash_recovery.pl +++ /dev/null @@ -1,98 +0,0 @@ -# Minimal test testing streaming replication -use strict; -use warnings; -use PostgreSQL::Test::Cluster; -use PostgreSQL::Test::Utils; -use Test::More tests => 6; - -# Initialize single node -my $node_one = PostgreSQL::Test::Cluster->new('node_one'); -$node_one->init(); -$node_one->start; - -# initialize the citus extension -$node_one->safe_psql('postgres', "CREATE EXTENSION citus;"); - -# create columnar table and insert simple data to verify the data survives a crash -$node_one->safe_psql('postgres', " -BEGIN; -CREATE TABLE t1 (a int, b text) USING columnar; -INSERT INTO t1 SELECT a, 'hello world ' || a FROM generate_series(1,1002) AS a; -COMMIT; -"); - -# simulate crash -$node_one->stop('immediate'); -$node_one->start; - -my $result = $node_one->safe_psql('postgres', "SELECT count(*) FROM t1;"); -print "node one count: $result\n"; -is($result, qq(1002), 'columnar recovered data from before crash'); - - -# truncate the table to verify the truncation survives a crash -$node_one->safe_psql('postgres', " -TRUNCATE t1; -"); - -# simulate crash -$node_one->stop('immediate'); -$node_one->start; - -$result = $node_one->safe_psql('postgres', "SELECT count(*) FROM t1;"); -print "node one count: $result\n"; -is($result, qq(0), 'columnar recovered truncation'); - -# test crashing while having an open transaction -$node_one->safe_psql('postgres', " -BEGIN; -INSERT INTO t1 SELECT a, 'hello world ' || a FROM generate_series(1,1003) AS a; -"); - -# simulate crash -$node_one->stop('immediate'); -$node_one->start; - -$result = $node_one->safe_psql('postgres', "SELECT count(*) FROM t1;"); -print "node one count: $result\n"; -is($result, qq(0), 'columnar crash during uncommitted transaction'); - -# test crashing while having a prepared transaction -$node_one->safe_psql('postgres', " -BEGIN; -INSERT INTO t1 SELECT a, 'hello world ' || a FROM generate_series(1,1004) AS a; -PREPARE TRANSACTION 'prepared_xact_crash'; -"); - -# simulate crash -$node_one->stop('immediate'); -$node_one->start; - -$result = $node_one->safe_psql('postgres', "SELECT count(*) FROM t1;"); -print "node one count: $result\n"; -is($result, qq(0), 'columnar crash during prepared transaction (before commit)'); - -$node_one->safe_psql('postgres', " -COMMIT PREPARED 'prepared_xact_crash'; -"); - -$result = $node_one->safe_psql('postgres', "SELECT count(*) FROM t1;"); -print "node one count: $result\n"; -is($result, qq(1004), 'columnar crash during prepared transaction (after commit)'); - -# test crash recovery with copied data -$node_one->safe_psql('postgres', " -\\copy t1 FROM stdin delimiter ',' -1,a -2,b -3,c -\\. -"); - -# simulate crash -$node_one->stop('immediate'); -$node_one->start; - -$result = $node_one->safe_psql('postgres', "SELECT count(*) FROM t1;"); -print "node one count: $result\n"; -is($result, qq(1007), 'columnar crash after copy command'); diff --git a/src/test/recovery/t_pg13_pg14/001_columnar_crash_recovery_pg13_pg14.pl b/src/test/recovery/t_pg13_pg14/001_columnar_crash_recovery_pg13_pg14.pl deleted file mode 100644 index 9ea87835f..000000000 --- a/src/test/recovery/t_pg13_pg14/001_columnar_crash_recovery_pg13_pg14.pl +++ /dev/null @@ -1,98 +0,0 @@ -# Minimal test testing streaming replication -use strict; -use warnings; -use PostgresNode; -use TestLib; -use Test::More tests => 6; - -# Initialize single node -my $node_one = get_new_node('node_one'); -$node_one->init(); -$node_one->start; - -# initialize the citus extension -$node_one->safe_psql('postgres', "CREATE EXTENSION citus;"); - -# create columnar table and insert simple data to verify the data survives a crash -$node_one->safe_psql('postgres', " -BEGIN; -CREATE TABLE t1 (a int, b text) USING columnar; -INSERT INTO t1 SELECT a, 'hello world ' || a FROM generate_series(1,1002) AS a; -COMMIT; -"); - -# simulate crash -$node_one->stop('immediate'); -$node_one->start; - -my $result = $node_one->safe_psql('postgres', "SELECT count(*) FROM t1;"); -print "node one count: $result\n"; -is($result, qq(1002), 'columnar recovered data from before crash'); - - -# truncate the table to verify the truncation survives a crash -$node_one->safe_psql('postgres', " -TRUNCATE t1; -"); - -# simulate crash -$node_one->stop('immediate'); -$node_one->start; - -$result = $node_one->safe_psql('postgres', "SELECT count(*) FROM t1;"); -print "node one count: $result\n"; -is($result, qq(0), 'columnar recovered truncation'); - -# test crashing while having an open transaction -$node_one->safe_psql('postgres', " -BEGIN; -INSERT INTO t1 SELECT a, 'hello world ' || a FROM generate_series(1,1003) AS a; -"); - -# simulate crash -$node_one->stop('immediate'); -$node_one->start; - -$result = $node_one->safe_psql('postgres', "SELECT count(*) FROM t1;"); -print "node one count: $result\n"; -is($result, qq(0), 'columnar crash during uncommitted transaction'); - -# test crashing while having a prepared transaction -$node_one->safe_psql('postgres', " -BEGIN; -INSERT INTO t1 SELECT a, 'hello world ' || a FROM generate_series(1,1004) AS a; -PREPARE TRANSACTION 'prepared_xact_crash'; -"); - -# simulate crash -$node_one->stop('immediate'); -$node_one->start; - -$result = $node_one->safe_psql('postgres', "SELECT count(*) FROM t1;"); -print "node one count: $result\n"; -is($result, qq(0), 'columnar crash during prepared transaction (before commit)'); - -$node_one->safe_psql('postgres', " -COMMIT PREPARED 'prepared_xact_crash'; -"); - -$result = $node_one->safe_psql('postgres', "SELECT count(*) FROM t1;"); -print "node one count: $result\n"; -is($result, qq(1004), 'columnar crash during prepared transaction (after commit)'); - -# test crash recovery with copied data -$node_one->safe_psql('postgres', " -\\copy t1 FROM stdin delimiter ',' -1,a -2,b -3,c -\\. -"); - -# simulate crash -$node_one->stop('immediate'); -$node_one->start; - -$result = $node_one->safe_psql('postgres', "SELECT count(*) FROM t1;"); -print "node one count: $result\n"; -is($result, qq(1007), 'columnar crash after copy command'); diff --git a/src/test/regress/Makefile b/src/test/regress/Makefile index 368f8f8c5..b801f33ff 100644 --- a/src/test/regress/Makefile +++ b/src/test/regress/Makefile @@ -44,7 +44,7 @@ vanilla_diffs_file = $(citus_abs_srcdir)/pg_vanilla_outputs/$(MAJORVERSION)/regr # intermediate, for muscle memory backward compatibility. check: check-full check-enterprise-full # check-full triggers all tests that ought to be run routinely -check-full: check-multi check-multi-mx check-multi-1 check-operations check-follower-cluster check-isolation check-failure check-split check-vanilla check-columnar check-columnar-isolation check-pg-upgrade check-arbitrary-configs check-citus-upgrade check-citus-upgrade-mixed check-citus-upgrade-local check-citus-upgrade-mixed-local +check-full: check-multi check-multi-mx check-multi-1 check-operations check-follower-cluster check-isolation check-failure check-split check-vanilla check-columnar check-columnar-isolation check-pg-upgrade check-arbitrary-configs check-citus-upgrade check-citus-upgrade-mixed check-citus-upgrade-local check-citus-upgrade-mixed-local check-pytest # check-enterprise-full triggers all enterprise specific tests check-enterprise-full: check-enterprise check-enterprise-isolation check-enterprise-failure check-enterprise-isolation-logicalrep-1 check-enterprise-isolation-logicalrep-2 check-enterprise-isolation-logicalrep-3 @@ -117,29 +117,31 @@ check-minimal-mx: all -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/mx_minimal_schedule $(EXTRA_TESTS) check-custom-schedule: all - $(pg_regress_multi_check) --load-extension=citus \ + $(pg_regress_multi_check) --load-extension=citus --worker-count=$(WORKERCOUNT) \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) check-failure-custom-schedule: all - $(pg_regress_multi_check) --load-extension=citus --mitmproxy \ + $(pg_regress_multi_check) --load-extension=citus --mitmproxy --worker-count=$(WORKERCOUNT) \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) check-isolation-custom-schedule: all $(isolation_test_files) - $(pg_regress_multi_check) --load-extension=citus --isolationtester \ + $(pg_regress_multi_check) --load-extension=citus --isolationtester --worker-count=$(WORKERCOUNT) \ -- $(MULTI_REGRESS_OPTS) --inputdir=$(citus_abs_srcdir)/build --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) check-custom-schedule-vg: all $(pg_regress_multi_check) --load-extension=citus \ - --valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \ + --valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --worker-count=$(WORKERCOUNT) \ + --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) check-failure-custom-schedule-vg: all $(pg_regress_multi_check) --load-extension=citus --mitmproxy \ - --valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \ + --valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --worker-count=$(WORKERCOUNT) \ + --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) check-isolation-custom-schedule-vg: all $(isolation_test_files) - $(pg_regress_multi_check) --load-extension=citus --isolationtester \ + $(pg_regress_multi_check) --load-extension=citus --isolationtester --worker-count=$(WORKERCOUNT) \ --valgrind --pg_ctl-timeout=360 --connection-timeout=500000 --valgrind-path=valgrind --valgrind-log-file=$(CITUS_VALGRIND_LOG_FILE) \ -- $(MULTI_REGRESS_OPTS) --inputdir=$(citus_abs_srcdir)/build --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) @@ -221,7 +223,7 @@ check-follower-cluster: all -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/multi_follower_schedule $(EXTRA_TESTS) check-operations: all - $(pg_regress_multi_check) --load-extension=citus \ + $(pg_regress_multi_check) --load-extension=citus --worker-count=6 \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/operations_schedule $(EXTRA_TESTS) check-columnar: all @@ -257,6 +259,9 @@ check-arbitrary-configs: all check-arbitrary-base: all ${arbitrary_config_check} --bindir=$(bindir) --pgxsdir=$(pgxsdir) --parallel=$(parallel) --configs=$(CONFIGS) --seed=$(seed) --base +check-pytest: + pytest -n auto + check-citus-upgrade: all $(citus_upgrade_check) \ --bindir=$(bindir) \ diff --git a/src/test/regress/Pipfile b/src/test/regress/Pipfile index 240dee3df..663785a3d 100644 --- a/src/test/regress/Pipfile +++ b/src/test/regress/Pipfile @@ -4,10 +4,17 @@ url = "https://pypi.python.org/simple" verify_ssl = true [packages] -mitmproxy = {editable = true, ref = "fix/tcp-flow-kill", git = "https://github.com/thanodnl/mitmproxy.git"} +mitmproxy = {editable = true, ref = "main", git = "https://github.com/citusdata/mitmproxy.git"} construct = "==2.9.45" docopt = "==0.6.2" -cryptography = "==3.4.8" +cryptography = ">=39.0.1" +pytest = "*" +psycopg = "*" +filelock = "*" +pytest-asyncio = "*" +pytest-timeout = "*" +pytest-xdist = "*" +pytest-repeat = "*" [dev-packages] black = "*" diff --git a/src/test/regress/Pipfile.lock b/src/test/regress/Pipfile.lock index 954c3610e..4a86e09a8 100644 --- a/src/test/regress/Pipfile.lock +++ b/src/test/regress/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "635b4c111e3bca87373fcdf308febf0a816dde15b14f6bf078f2b456630e5ef1" + "sha256": "eb9ca3a7b05e76c7ac60179a1755f89600dfb215e02bf08c258d548df1d96025" }, "pipfile-spec": 6, "requires": { @@ -24,6 +24,14 @@ "markers": "python_version >= '3.6'", "version": "==3.4.1" }, + "attrs": { + "hashes": [ + "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836", + "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99" + ], + "markers": "python_version >= '3.6'", + "version": "==22.2.0" + }, "blinker": { "hashes": [ "sha256:471aee25f3992bd325afa3772f1063dbdbbca947a041b8b89466dc00d606f8b6" @@ -211,28 +219,32 @@ }, "cryptography": { "hashes": [ - "sha256:0a7dcbcd3f1913f664aca35d47c1331fce738d44ec34b7be8b9d332151b0b01e", - "sha256:1eb7bb0df6f6f583dd8e054689def236255161ebbcf62b226454ab9ec663746b", - "sha256:21ca464b3a4b8d8e86ba0ee5045e103a1fcfac3b39319727bc0fc58c09c6aff7", - "sha256:34dae04a0dce5730d8eb7894eab617d8a70d0c97da76b905de9efb7128ad7085", - "sha256:3520667fda779eb788ea00080124875be18f2d8f0848ec00733c0ec3bb8219fc", - "sha256:3c4129fc3fdc0fa8e40861b5ac0c673315b3c902bbdc05fc176764815b43dd1d", - "sha256:3fa3a7ccf96e826affdf1a0a9432be74dc73423125c8f96a909e3835a5ef194a", - "sha256:5b0fbfae7ff7febdb74b574055c7466da334a5371f253732d7e2e7525d570498", - "sha256:695104a9223a7239d155d7627ad912953b540929ef97ae0c34c7b8bf30857e89", - "sha256:8695456444f277af73a4877db9fc979849cd3ee74c198d04fc0776ebc3db52b9", - "sha256:94cc5ed4ceaefcbe5bf38c8fba6a21fc1d365bb8fb826ea1688e3370b2e24a1c", - "sha256:94fff993ee9bc1b2440d3b7243d488c6a3d9724cc2b09cdb297f6a886d040ef7", - "sha256:9965c46c674ba8cc572bc09a03f4c649292ee73e1b683adb1ce81e82e9a6a0fb", - "sha256:a00cf305f07b26c351d8d4e1af84ad7501eca8a342dedf24a7acb0e7b7406e14", - "sha256:a305600e7a6b7b855cd798e00278161b681ad6e9b7eca94c721d5f588ab212af", - "sha256:cd65b60cfe004790c795cc35f272e41a3df4631e2fb6b35aa7ac6ef2859d554e", - "sha256:d2a6e5ef66503da51d2110edf6c403dc6b494cc0082f85db12f54e9c5d4c3ec5", - "sha256:d9ec0e67a14f9d1d48dd87a2531009a9b251c02ea42851c060b25c782516ff06", - "sha256:f44d141b8c4ea5eb4dbc9b3ad992d45580c1d22bf5e24363f2fbf50c2d7ae8a7" + "sha256:103e8f7155f3ce2ffa0049fe60169878d47a4364b277906386f8de21c9234aa1", + "sha256:23df8ca3f24699167daf3e23e51f7ba7334d504af63a94af468f468b975b7dd7", + "sha256:2725672bb53bb92dc7b4150d233cd4b8c59615cd8288d495eaa86db00d4e5c06", + "sha256:30b1d1bfd00f6fc80d11300a29f1d8ab2b8d9febb6ed4a38a76880ec564fae84", + "sha256:35d658536b0a4117c885728d1a7032bdc9a5974722ae298d6c533755a6ee3915", + "sha256:50cadb9b2f961757e712a9737ef33d89b8190c3ea34d0fb6675e00edbe35d074", + "sha256:5f8c682e736513db7d04349b4f6693690170f95aac449c56f97415c6980edef5", + "sha256:6236a9610c912b129610eb1a274bdc1350b5df834d124fa84729ebeaf7da42c3", + "sha256:788b3921d763ee35dfdb04248d0e3de11e3ca8eb22e2e48fef880c42e1f3c8f9", + "sha256:8bc0008ef798231fac03fe7d26e82d601d15bd16f3afaad1c6113771566570f3", + "sha256:8f35c17bd4faed2bc7797d2a66cbb4f986242ce2e30340ab832e5d99ae60e011", + "sha256:b49a88ff802e1993b7f749b1eeb31134f03c8d5c956e3c125c75558955cda536", + "sha256:bc0521cce2c1d541634b19f3ac661d7a64f9555135e9d8af3980965be717fd4a", + "sha256:bc5b871e977c8ee5a1bbc42fa8d19bcc08baf0c51cbf1586b0e87a2694dde42f", + "sha256:c43ac224aabcbf83a947eeb8b17eaf1547bce3767ee2d70093b461f31729a480", + "sha256:d15809e0dbdad486f4ad0979753518f47980020b7a34e9fc56e8be4f60702fac", + "sha256:d7d84a512a59f4412ca8549b01f94be4161c94efc598bf09d027d67826beddc0", + "sha256:e029b844c21116564b8b61216befabca4b500e6816fa9f0ba49527653cae2108", + "sha256:e8a0772016feeb106efd28d4a328e77dc2edae84dfbac06061319fdb669ff828", + "sha256:e944fe07b6f229f4c1a06a7ef906a19652bdd9fd54c761b0ff87e83ae7a30354", + "sha256:eb40fe69cfc6f5cdab9a5ebd022131ba21453cf7b8a7fd3631f45bbf52bed612", + "sha256:fa507318e427169ade4e9eccef39e9011cdc19534f55ca2f36ec3f388c1f70f3", + "sha256:ffd394c7896ed7821a6d13b24657c6a34b6e2650bd84ae063cf11ccffa4f1a97" ], "index": "pypi", - "version": "==3.4.8" + "version": "==39.0.2" }, "docopt": { "hashes": [ @@ -241,6 +253,30 @@ "index": "pypi", "version": "==0.6.2" }, + "exceptiongroup": { + "hashes": [ + "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e", + "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785" + ], + "markers": "python_version < '3.11'", + "version": "==1.1.1" + }, + "execnet": { + "hashes": [ + "sha256:8f694f3ba9cc92cab508b152dcfe322153975c29bda272e2fd7f3f00f36e47c5", + "sha256:a295f7cc774947aac58dde7fdc85f4aa00c42adf5d8f5468fc630c1acf30a142" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==1.9.0" + }, + "filelock": { + "hashes": [ + "sha256:3199fd0d3faea8b911be52b663dfccceb84c95949dd13179aa21436d1a79c4ce", + "sha256:e90b34656470756edf8b19656785c5fea73afa1953f3e1b0d645cef11cab3182" + ], + "index": "pypi", + "version": "==3.10.0" + }, "flask": { "hashes": [ "sha256:59da8a3170004800a2837844bfa84d49b022550616070f7cb1a659682b2e7c9f", @@ -281,6 +317,14 @@ "markers": "python_full_version >= '3.6.1'", "version": "==6.0.1" }, + "iniconfig": { + "hashes": [ + "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", + "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374" + ], + "markers": "python_version >= '3.7'", + "version": "==2.0.0" + }, "itsdangerous": { "hashes": [ "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44", @@ -371,65 +415,84 @@ }, "mitmproxy": { "editable": true, - "git": "https://github.com/thanodnl/mitmproxy.git", - "ref": "62798926288526d27221bdb618f526862a878e33" + "git": "https://github.com/citusdata/mitmproxy.git", + "ref": "2fd18ef051b987925a36337ab1d61aa674353b44" }, "msgpack": { "hashes": [ - "sha256:002b5c72b6cd9b4bafd790f364b8480e859b4712e91f43014fe01e4f957b8467", - "sha256:0a68d3ac0104e2d3510de90a1091720157c319ceeb90d74f7b5295a6bee51bae", - "sha256:0df96d6eaf45ceca04b3f3b4b111b86b33785683d682c655063ef8057d61fd92", - "sha256:0dfe3947db5fb9ce52aaea6ca28112a170db9eae75adf9339a1aec434dc954ef", - "sha256:0e3590f9fb9f7fbc36df366267870e77269c03172d086fa76bb4eba8b2b46624", - "sha256:11184bc7e56fd74c00ead4f9cc9a3091d62ecb96e97653add7a879a14b003227", - "sha256:112b0f93202d7c0fef0b7810d465fde23c746a2d482e1e2de2aafd2ce1492c88", - "sha256:1276e8f34e139aeff1c77a3cefb295598b504ac5314d32c8c3d54d24fadb94c9", - "sha256:1576bd97527a93c44fa856770197dec00d223b0b9f36ef03f65bac60197cedf8", - "sha256:1e91d641d2bfe91ba4c52039adc5bccf27c335356055825c7f88742c8bb900dd", - "sha256:26b8feaca40a90cbe031b03d82b2898bf560027160d3eae1423f4a67654ec5d6", - "sha256:2999623886c5c02deefe156e8f869c3b0aaeba14bfc50aa2486a0415178fce55", - "sha256:2a2df1b55a78eb5f5b7d2a4bb221cd8363913830145fad05374a80bf0877cb1e", - "sha256:2bb8cdf50dd623392fa75525cce44a65a12a00c98e1e37bf0fb08ddce2ff60d2", - "sha256:2cc5ca2712ac0003bcb625c96368fd08a0f86bbc1a5578802512d87bc592fe44", - "sha256:35bc0faa494b0f1d851fd29129b2575b2e26d41d177caacd4206d81502d4c6a6", - "sha256:3c11a48cf5e59026ad7cb0dc29e29a01b5a66a3e333dc11c04f7e991fc5510a9", - "sha256:449e57cc1ff18d3b444eb554e44613cffcccb32805d16726a5494038c3b93dab", - "sha256:462497af5fd4e0edbb1559c352ad84f6c577ffbbb708566a0abaaa84acd9f3ae", - "sha256:4733359808c56d5d7756628736061c432ded018e7a1dff2d35a02439043321aa", - "sha256:48f5d88c99f64c456413d74a975bd605a9b0526293218a3b77220a2c15458ba9", - "sha256:49565b0e3d7896d9ea71d9095df15b7f75a035c49be733051c34762ca95bbf7e", - "sha256:4ab251d229d10498e9a2f3b1e68ef64cb393394ec477e3370c457f9430ce9250", - "sha256:4d5834a2a48965a349da1c5a79760d94a1a0172fbb5ab6b5b33cbf8447e109ce", - "sha256:4dea20515f660aa6b7e964433b1808d098dcfcabbebeaaad240d11f909298075", - "sha256:545e3cf0cf74f3e48b470f68ed19551ae6f9722814ea969305794645da091236", - "sha256:63e29d6e8c9ca22b21846234913c3466b7e4ee6e422f205a2988083de3b08cae", - "sha256:6916c78f33602ecf0509cc40379271ba0f9ab572b066bd4bdafd7434dee4bc6e", - "sha256:6a4192b1ab40f8dca3f2877b70e63799d95c62c068c84dc028b40a6cb03ccd0f", - "sha256:6c9566f2c39ccced0a38d37c26cc3570983b97833c365a6044edef3574a00c08", - "sha256:76ee788122de3a68a02ed6f3a16bbcd97bc7c2e39bd4d94be2f1821e7c4a64e6", - "sha256:7760f85956c415578c17edb39eed99f9181a48375b0d4a94076d84148cf67b2d", - "sha256:77ccd2af37f3db0ea59fb280fa2165bf1b096510ba9fe0cc2bf8fa92a22fdb43", - "sha256:81fc7ba725464651190b196f3cd848e8553d4d510114a954681fd0b9c479d7e1", - "sha256:85f279d88d8e833ec015650fd15ae5eddce0791e1e8a59165318f371158efec6", - "sha256:9667bdfdf523c40d2511f0e98a6c9d3603be6b371ae9a238b7ef2dc4e7a427b0", - "sha256:a75dfb03f8b06f4ab093dafe3ddcc2d633259e6c3f74bb1b01996f5d8aa5868c", - "sha256:ac5bd7901487c4a1dd51a8c58f2632b15d838d07ceedaa5e4c080f7190925bff", - "sha256:aca0f1644d6b5a73eb3e74d4d64d5d8c6c3d577e753a04c9e9c87d07692c58db", - "sha256:b17be2478b622939e39b816e0aa8242611cc8d3583d1cd8ec31b249f04623243", - "sha256:c1683841cd4fa45ac427c18854c3ec3cd9b681694caf5bff04edb9387602d661", - "sha256:c23080fdeec4716aede32b4e0ef7e213c7b1093eede9ee010949f2a418ced6ba", - "sha256:d5b5b962221fa2c5d3a7f8133f9abffc114fe218eb4365e40f17732ade576c8e", - "sha256:d603de2b8d2ea3f3bcb2efe286849aa7a81531abc52d8454da12f46235092bcb", - "sha256:e83f80a7fec1a62cf4e6c9a660e39c7f878f603737a0cdac8c13131d11d97f52", - "sha256:eb514ad14edf07a1dbe63761fd30f89ae79b42625731e1ccf5e1f1092950eaa6", - "sha256:eba96145051ccec0ec86611fe9cf693ce55f2a3ce89c06ed307de0e085730ec1", - "sha256:ed6f7b854a823ea44cf94919ba3f727e230da29feb4a99711433f25800cf747f", - "sha256:f0029245c51fd9473dc1aede1160b0a29f4a912e6b1dd353fa6d317085b219da", - "sha256:f5d869c18f030202eb412f08b28d2afeea553d6613aee89e200d7aca7ef01f5f", - "sha256:fb62ea4b62bfcb0b380d5680f9a4b3f9a2d166d9394e9bbd9666c0ee09a3645c", - "sha256:fcb8a47f43acc113e24e910399376f7277cf8508b27e5b88499f053de6b115a8" + "sha256:06f5174b5f8ed0ed919da0e62cbd4ffde676a374aba4020034da05fab67b9164", + "sha256:0c05a4a96585525916b109bb85f8cb6511db1c6f5b9d9cbcbc940dc6b4be944b", + "sha256:137850656634abddfb88236008339fdaba3178f4751b28f270d2ebe77a563b6c", + "sha256:17358523b85973e5f242ad74aa4712b7ee560715562554aa2134d96e7aa4cbbf", + "sha256:18334484eafc2b1aa47a6d42427da7fa8f2ab3d60b674120bce7a895a0a85bdd", + "sha256:1835c84d65f46900920b3708f5ba829fb19b1096c1800ad60bae8418652a951d", + "sha256:1967f6129fc50a43bfe0951c35acbb729be89a55d849fab7686004da85103f1c", + "sha256:1ab2f3331cb1b54165976a9d976cb251a83183631c88076613c6c780f0d6e45a", + "sha256:1c0f7c47f0087ffda62961d425e4407961a7ffd2aa004c81b9c07d9269512f6e", + "sha256:20a97bf595a232c3ee6d57ddaadd5453d174a52594bf9c21d10407e2a2d9b3bd", + "sha256:20c784e66b613c7f16f632e7b5e8a1651aa5702463d61394671ba07b2fc9e025", + "sha256:266fa4202c0eb94d26822d9bfd7af25d1e2c088927fe8de9033d929dd5ba24c5", + "sha256:28592e20bbb1620848256ebc105fc420436af59515793ed27d5c77a217477705", + "sha256:288e32b47e67f7b171f86b030e527e302c91bd3f40fd9033483f2cacc37f327a", + "sha256:3055b0455e45810820db1f29d900bf39466df96ddca11dfa6d074fa47054376d", + "sha256:332360ff25469c346a1c5e47cbe2a725517919892eda5cfaffe6046656f0b7bb", + "sha256:362d9655cd369b08fda06b6657a303eb7172d5279997abe094512e919cf74b11", + "sha256:366c9a7b9057e1547f4ad51d8facad8b406bab69c7d72c0eb6f529cf76d4b85f", + "sha256:36961b0568c36027c76e2ae3ca1132e35123dcec0706c4b7992683cc26c1320c", + "sha256:379026812e49258016dd84ad79ac8446922234d498058ae1d415f04b522d5b2d", + "sha256:382b2c77589331f2cb80b67cc058c00f225e19827dbc818d700f61513ab47bea", + "sha256:476a8fe8fae289fdf273d6d2a6cb6e35b5a58541693e8f9f019bfe990a51e4ba", + "sha256:48296af57cdb1d885843afd73c4656be5c76c0c6328db3440c9601a98f303d87", + "sha256:4867aa2df9e2a5fa5f76d7d5565d25ec76e84c106b55509e78c1ede0f152659a", + "sha256:4c075728a1095efd0634a7dccb06204919a2f67d1893b6aa8e00497258bf926c", + "sha256:4f837b93669ce4336e24d08286c38761132bc7ab29782727f8557e1eb21b2080", + "sha256:4f8d8b3bf1ff2672567d6b5c725a1b347fe838b912772aa8ae2bf70338d5a198", + "sha256:525228efd79bb831cf6830a732e2e80bc1b05436b086d4264814b4b2955b2fa9", + "sha256:5494ea30d517a3576749cad32fa27f7585c65f5f38309c88c6d137877fa28a5a", + "sha256:55b56a24893105dc52c1253649b60f475f36b3aa0fc66115bffafb624d7cb30b", + "sha256:56a62ec00b636583e5cb6ad313bbed36bb7ead5fa3a3e38938503142c72cba4f", + "sha256:57e1f3528bd95cc44684beda696f74d3aaa8a5e58c816214b9046512240ef437", + "sha256:586d0d636f9a628ddc6a17bfd45aa5b5efaf1606d2b60fa5d87b8986326e933f", + "sha256:5cb47c21a8a65b165ce29f2bec852790cbc04936f502966768e4aae9fa763cb7", + "sha256:6c4c68d87497f66f96d50142a2b73b97972130d93677ce930718f68828b382e2", + "sha256:821c7e677cc6acf0fd3f7ac664c98803827ae6de594a9f99563e48c5a2f27eb0", + "sha256:916723458c25dfb77ff07f4c66aed34e47503b2eb3188b3adbec8d8aa6e00f48", + "sha256:9e6ca5d5699bcd89ae605c150aee83b5321f2115695e741b99618f4856c50898", + "sha256:9f5ae84c5c8a857ec44dc180a8b0cc08238e021f57abdf51a8182e915e6299f0", + "sha256:a2b031c2e9b9af485d5e3c4520f4220d74f4d222a5b8dc8c1a3ab9448ca79c57", + "sha256:a61215eac016f391129a013c9e46f3ab308db5f5ec9f25811e811f96962599a8", + "sha256:a740fa0e4087a734455f0fc3abf5e746004c9da72fbd541e9b113013c8dc3282", + "sha256:a9985b214f33311df47e274eb788a5893a761d025e2b92c723ba4c63936b69b1", + "sha256:ab31e908d8424d55601ad7075e471b7d0140d4d3dd3272daf39c5c19d936bd82", + "sha256:ac9dd47af78cae935901a9a500104e2dea2e253207c924cc95de149606dc43cc", + "sha256:addab7e2e1fcc04bd08e4eb631c2a90960c340e40dfc4a5e24d2ff0d5a3b3edb", + "sha256:b1d46dfe3832660f53b13b925d4e0fa1432b00f5f7210eb3ad3bb9a13c6204a6", + "sha256:b2de4c1c0538dcb7010902a2b97f4e00fc4ddf2c8cda9749af0e594d3b7fa3d7", + "sha256:b5ef2f015b95f912c2fcab19c36814963b5463f1fb9049846994b007962743e9", + "sha256:b72d0698f86e8d9ddf9442bdedec15b71df3598199ba33322d9711a19f08145c", + "sha256:bae7de2026cbfe3782c8b78b0db9cbfc5455e079f1937cb0ab8d133496ac55e1", + "sha256:bf22a83f973b50f9d38e55c6aade04c41ddda19b00c4ebc558930d78eecc64ed", + "sha256:c075544284eadc5cddc70f4757331d99dcbc16b2bbd4849d15f8aae4cf36d31c", + "sha256:c396e2cc213d12ce017b686e0f53497f94f8ba2b24799c25d913d46c08ec422c", + "sha256:cb5aaa8c17760909ec6cb15e744c3ebc2ca8918e727216e79607b7bbce9c8f77", + "sha256:cdc793c50be3f01106245a61b739328f7dccc2c648b501e237f0699fe1395b81", + "sha256:d25dd59bbbbb996eacf7be6b4ad082ed7eacc4e8f3d2df1ba43822da9bfa122a", + "sha256:e42b9594cc3bf4d838d67d6ed62b9e59e201862a25e9a157019e171fbe672dd3", + "sha256:e57916ef1bd0fee4f21c4600e9d1da352d8816b52a599c46460e93a6e9f17086", + "sha256:ed40e926fa2f297e8a653c954b732f125ef97bdd4c889f243182299de27e2aa9", + "sha256:ef8108f8dedf204bb7b42994abf93882da1159728a2d4c5e82012edd92c9da9f", + "sha256:f933bbda5a3ee63b8834179096923b094b76f0c7a73c1cfe8f07ad608c58844b", + "sha256:fe5c63197c55bce6385d9aee16c4d0641684628f63ace85f73571e65ad1c1e8d" ], - "version": "==1.0.4" + "version": "==1.0.5" + }, + "packaging": { + "hashes": [ + "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2", + "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97" + ], + "markers": "python_version >= '3.7'", + "version": "==23.0" }, "passlib": { "hashes": [ @@ -438,6 +501,14 @@ ], "version": "==1.7.4" }, + "pluggy": { + "hashes": [ + "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159", + "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3" + ], + "markers": "python_version >= '3.6'", + "version": "==1.0.0" + }, "protobuf": { "hashes": [ "sha256:0c44e01f74109decea196b5b313b08edb5316df77313995594a6981e95674259", @@ -465,6 +536,14 @@ "markers": "python_version >= '3.5'", "version": "==3.18.3" }, + "psycopg": { + "hashes": [ + "sha256:59b4a71536b146925513c0234dfd1dc42b81e65d56ce5335dff4813434dbc113", + "sha256:b1500c42063abaa01d30b056f0b300826b8dd8d586900586029a294ce74af327" + ], + "index": "pypi", + "version": "==3.1.8" + }, "publicsuffix2": { "hashes": [ "sha256:00f8cc31aa8d0d5592a5ced19cccba7de428ebca985db26ac852d920ddd6fe7b", @@ -495,23 +574,22 @@ "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9", "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.21" }, "pyopenssl": { "hashes": [ - "sha256:5e2d8c5e46d0d865ae933bef5230090bdaf5506281e9eec60fa250ee80600cb3", - "sha256:8935bd4920ab9abfebb07c41a4f58296407ed77f04bd1a92914044b848ba1ed6" + "sha256:c1cc5f86bcacefc84dada7d31175cae1b1518d5f60d3d0bb595a67822a868a6f", + "sha256:df5fc28af899e74e19fccb5510df423581047e10ab6f1f4ba1763ff5fde844c0" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", - "version": "==21.0.0" + "markers": "python_version >= '3.6'", + "version": "==23.0.0" }, "pyparsing": { "hashes": [ "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b" ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'", "version": "==2.4.7" }, "pyperclip": { @@ -520,6 +598,46 @@ ], "version": "==1.8.2" }, + "pytest": { + "hashes": [ + "sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e", + "sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4" + ], + "index": "pypi", + "version": "==7.2.2" + }, + "pytest-asyncio": { + "hashes": [ + "sha256:83cbf01169ce3e8eb71c6c278ccb0574d1a7a3bb8eaaf5e50e0ad342afb33b36", + "sha256:f129998b209d04fcc65c96fc85c11e5316738358909a8399e93be553d7656442" + ], + "index": "pypi", + "version": "==0.20.3" + }, + "pytest-repeat": { + "hashes": [ + "sha256:4474a7d9e9137f6d8cc8ae297f8c4168d33c56dd740aa78cfffe562557e6b96e", + "sha256:5cd3289745ab3156d43eb9c8e7f7d00a926f3ae5c9cf425bec649b2fe15bad5b" + ], + "index": "pypi", + "version": "==0.9.1" + }, + "pytest-timeout": { + "hashes": [ + "sha256:c07ca07404c612f8abbe22294b23c368e2e5104b521c1790195561f37e1ac3d9", + "sha256:f6f50101443ce70ad325ceb4473c4255e9d74e3c7cd0ef827309dfa4c0d975c6" + ], + "index": "pypi", + "version": "==2.1.0" + }, + "pytest-xdist": { + "hashes": [ + "sha256:1849bd98d8b242b948e472db7478e090bf3361912a8fed87992ed94085f54727", + "sha256:37290d161638a20b672401deef1cba812d110ac27e35d213f091d15b8beb40c9" + ], + "index": "pypi", + "version": "==3.2.1" + }, "ruamel.yaml": { "hashes": [ "sha256:1a771fc92d3823682b7f0893ad56cb5a5c87c48e62b5399d6f42c8759a583b33", @@ -528,13 +646,47 @@ "markers": "python_version >= '3'", "version": "==0.17.16" }, - "six": { + "ruamel.yaml.clib": { "hashes": [ - "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", - "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + "sha256:045e0626baf1c52e5527bd5db361bc83180faaba2ff586e763d3d5982a876a9e", + "sha256:15910ef4f3e537eea7fe45f8a5d19997479940d9196f357152a09031c5be59f3", + "sha256:184faeaec61dbaa3cace407cffc5819f7b977e75360e8d5ca19461cd851a5fc5", + "sha256:1f08fd5a2bea9c4180db71678e850b995d2a5f4537be0e94557668cf0f5f9497", + "sha256:2aa261c29a5545adfef9296b7e33941f46aa5bbd21164228e833412af4c9c75f", + "sha256:3110a99e0f94a4a3470ff67fc20d3f96c25b13d24c6980ff841e82bafe827cac", + "sha256:3243f48ecd450eddadc2d11b5feb08aca941b5cd98c9b1db14b2fd128be8c697", + "sha256:370445fd795706fd291ab00c9df38a0caed0f17a6fb46b0f607668ecb16ce763", + "sha256:40d030e2329ce5286d6b231b8726959ebbe0404c92f0a578c0e2482182e38282", + "sha256:41d0f1fa4c6830176eef5b276af04c89320ea616655d01327d5ce65e50575c94", + "sha256:4a4d8d417868d68b979076a9be6a38c676eca060785abaa6709c7b31593c35d1", + "sha256:4b3a93bb9bc662fc1f99c5c3ea8e623d8b23ad22f861eb6fce9377ac07ad6072", + "sha256:5bc0667c1eb8f83a3752b71b9c4ba55ef7c7058ae57022dd9b29065186a113d9", + "sha256:721bc4ba4525f53f6a611ec0967bdcee61b31df5a56801281027a3a6d1c2daf5", + "sha256:763d65baa3b952479c4e972669f679fe490eee058d5aa85da483ebae2009d231", + "sha256:7bdb4c06b063f6fd55e472e201317a3bb6cdeeee5d5a38512ea5c01e1acbdd93", + "sha256:8831a2cedcd0f0927f788c5bdf6567d9dc9cc235646a434986a852af1cb54b4b", + "sha256:91a789b4aa0097b78c93e3dc4b40040ba55bef518f84a40d4442f713b4094acb", + "sha256:92460ce908546ab69770b2e576e4f99fbb4ce6ab4b245345a3869a0a0410488f", + "sha256:99e77daab5d13a48a4054803d052ff40780278240a902b880dd37a51ba01a307", + "sha256:a234a20ae07e8469da311e182e70ef6b199d0fbeb6c6cc2901204dd87fb867e8", + "sha256:a7b301ff08055d73223058b5c46c55638917f04d21577c95e00e0c4d79201a6b", + "sha256:be2a7ad8fd8f7442b24323d24ba0b56c51219513cfa45b9ada3b87b76c374d4b", + "sha256:bf9a6bc4a0221538b1a7de3ed7bca4c93c02346853f44e1cd764be0023cd3640", + "sha256:c3ca1fbba4ae962521e5eb66d72998b51f0f4d0f608d3c0347a48e1af262efa7", + "sha256:d000f258cf42fec2b1bbf2863c61d7b8918d31ffee905da62dede869254d3b8a", + "sha256:d5859983f26d8cd7bb5c287ef452e8aacc86501487634573d260968f753e1d71", + "sha256:d5e51e2901ec2366b79f16c2299a03e74ba4531ddcfacc1416639c557aef0ad8", + "sha256:da538167284de58a52109a9b89b8f6a53ff8437dd6dc26d33b57bf6699153122", + "sha256:debc87a9516b237d0466a711b18b6ebeb17ba9f391eb7f91c649c5c4ec5006c7", + "sha256:df5828871e6648db72d1c19b4bd24819b80a755c4541d3409f0f7acd0f335c80", + "sha256:ecdf1a604009bd35c674b9225a8fa609e0282d9b896c03dd441a91e5f53b534e", + "sha256:efa08d63ef03d079dcae1dfe334f6c8847ba8b645d08df286358b1f5293d24ab", + "sha256:f01da5790e95815eb5a8a138508c01c758e5f5bc0ce4286c4f7028b8dd7ac3d0", + "sha256:f34019dced51047d6f70cb9383b2ae2853b7fc4dce65129a5acd49f4f9256646", + "sha256:f6d3d39611ac2e4f62c3128a9eed45f19a6608670c5a2f4f07f24e8de3441d38" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==1.16.0" + "markers": "platform_python_implementation == 'CPython' and python_version < '3.10'", + "version": "==0.2.7" }, "sortedcontainers": { "hashes": [ @@ -543,6 +695,14 @@ ], "version": "==2.4.0" }, + "tomli": { + "hashes": [ + "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", + "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" + ], + "markers": "python_version < '3.11'", + "version": "==2.0.1" + }, "tornado": { "hashes": [ "sha256:1d54d13ab8414ed44de07efecb97d4ef7c39f7438cf5e976ccd356bebb1b5fca", @@ -560,6 +720,14 @@ "markers": "python_version >= '3.7'", "version": "==6.2" }, + "typing-extensions": { + "hashes": [ + "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb", + "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4" + ], + "markers": "python_version >= '3.7'", + "version": "==4.5.0" + }, "urwid": { "hashes": [ "sha256:588bee9c1cb208d0906a9f73c613d2bd32c3ed3702012f51efe318a3f2127eae" @@ -568,11 +736,11 @@ }, "werkzeug": { "hashes": [ - "sha256:7ea2d48322cc7c0f8b3a215ed73eabd7b5d75d0b50e31ab006286ccff9e00b8f", - "sha256:f979ab81f58d7318e064e99c4506445d60135ac5cd2e177a2de0089bfd4c9bd5" + "sha256:2e1ccc9417d4da358b9de6f174e3ac094391ea1d4fbef2d667865d819dfd0afe", + "sha256:56433961bc1f12533306c624f3be5e744389ac61d722175d543e1751285da612" ], "markers": "python_version >= '3.7'", - "version": "==2.2.2" + "version": "==2.2.3" }, "wsproto": { "hashes": [ @@ -695,11 +863,11 @@ }, "flake8-bugbear": { "hashes": [ - "sha256:04a115e5f9c8e87c38bdbbcdf9f58223ffe05469c07c9a7bd8633330bc4d078b", - "sha256:55902ab5a48c5ea53d8689ecd146eda548e72f2724192b9c1d68f6d975d13c06" + "sha256:beb5c7efcd7ccc2039ef66a77bb8db925e7be3531ff1cb4d0b7030d0e2113d72", + "sha256:e3e7f74c8a49ad3794a7183353026dabd68c74030d5f46571f84c1fb0eb79363" ], "index": "pypi", - "version": "==23.1.20" + "version": "==23.3.12" }, "isort": { "hashes": [ @@ -735,19 +903,19 @@ }, "pathspec": { "hashes": [ - "sha256:3a66eb970cbac598f9e5ccb5b2cf58930cd8e3ed86d393d541eaf2d8b1705229", - "sha256:64d338d4e0914e91c1792321e6907b5a593f1ab1851de7fc269557a21b30ebbc" + "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687", + "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293" ], "markers": "python_version >= '3.7'", - "version": "==0.11.0" + "version": "==0.11.1" }, "platformdirs": { "hashes": [ - "sha256:8a1228abb1ef82d788f74139988b137e78692984ec7b08eaa6c65f1723af28f9", - "sha256:b1d5eb14f221506f50d6604a561f4c5786d9e80355219694a1b244bcd96f4567" + "sha256:024996549ee88ec1a9aa99ff7f8fc819bb59e2c3477b410d90a16d32d6e707aa", + "sha256:e5986afb596e4bb5bde29a79ac9061aa955b94fca2399b7aaac4090860920dd8" ], "markers": "python_version >= '3.7'", - "version": "==3.0.0" + "version": "==3.1.1" }, "pycodestyle": { "hashes": [ @@ -772,6 +940,14 @@ ], "markers": "python_version < '3.11'", "version": "==2.0.1" + }, + "typing-extensions": { + "hashes": [ + "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb", + "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4" + ], + "markers": "python_version >= '3.7'", + "version": "==4.5.0" } } } diff --git a/src/test/regress/README.md b/src/test/regress/README.md index ce6b0b5a6..9793a5d1d 100644 --- a/src/test/regress/README.md +++ b/src/test/regress/README.md @@ -106,10 +106,18 @@ Adding a new test file is quite simple: See [`src/test/regress/spec/README.md`](https://github.com/citusdata/citus/blob/master/src/test/regress/spec/README.md) +## Pytest testing + +See [`src/test/regress/citus_tests/test/README.md`](https://github.com/citusdata/citus/blob/master/src/test/regress/citus_tests/test/README.md) + ## Upgrade testing See [`src/test/regress/citus_tests/upgrade/README.md`](https://github.com/citusdata/citus/blob/master/src/test/regress/citus_tests/upgrade/README.md) +## Arbitrary configs testing + +See [`src/test/regress/citus_tests/arbitrary_configs/README.md`](https://github.com/citusdata/citus/blob/master/src/test/regress/citus_tests/arbitrary_configsupgrade/README.md) + ## Failure testing See [`src/test/regress/mitmscripts/README.md`](https://github.com/citusdata/citus/blob/master/src/test/regress/mitmscripts/README.md) diff --git a/src/test/regress/bin/create_test.py b/src/test/regress/bin/create_test.py index c6efde8d8..ee15ecd83 100755 --- a/src/test/regress/bin/create_test.py +++ b/src/test/regress/bin/create_test.py @@ -4,25 +4,26 @@ import os import random import sys -if len(sys.argv) != 2: - print( - "ERROR: Expected the name of the new test as an argument, such as:\n" - "src/test/regress/bin/create_test.py my_awesome_test" - ) - sys.exit(1) +if __name__ == "__main__": + if len(sys.argv) != 2: + print( + "ERROR: Expected the name of the new test as an argument, such as:\n" + "src/test/regress/bin/create_test.py my_awesome_test" + ) + sys.exit(1) -test_name = sys.argv[1] + test_name = sys.argv[1] -regress_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) -filename = os.path.join(regress_dir, "sql", f"{test_name}.sql") + regress_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + filename = os.path.join(regress_dir, "sql", f"{test_name}.sql") -if os.path.isfile(filename): - print(f"ERROR: test file '{filename}' already exists") - sys.exit(1) + if os.path.isfile(filename): + print(f"ERROR: test file '{filename}' already exists") + sys.exit(1) -shard_id = random.randint(1, 999999) * 100 + shard_id = random.randint(1, 999999) * 100 -contents = f"""CREATE SCHEMA {test_name}; + contents = f"""CREATE SCHEMA {test_name}; SET search_path TO {test_name}; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; @@ -34,9 +35,8 @@ SET client_min_messages TO WARNING; DROP SCHEMA {test_name} CASCADE; """ + with open(filename, "w") as f: + f.write(contents) -with open(filename, "w") as f: - f.write(contents) - -print(f"Created {filename}") -print(f"Don't forget to add '{test_name}' in multi_schedule somewhere") + print(f"Created {filename}") + print(f"Don't forget to add '{test_name}' in multi_schedule somewhere") diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed index df343a077..2ebb31f47 100644 --- a/src/test/regress/bin/normalize.sed +++ b/src/test/regress/bin/normalize.sed @@ -28,6 +28,10 @@ s/\(ref_id\)=\([0-9]+\)/(ref_id)=(X)/g # shard table names for multi_subtransactions s/"t2_[0-9]+"/"t2_xxxxxxx"/g +# shard table names for MERGE tests +s/merge_schema\.([_a-z0-9]+)_40[0-9]+ /merge_schema.\1_xxxxxxx /g +s/pgmerge_schema\.([_a-z0-9]+)_40[0-9]+ /pgmerge_schema.\1_xxxxxxx /g + # shard table names for multi_subquery s/ keyval(1|2|ref)_[0-9]+ / keyval\1_xxxxxxx /g diff --git a/src/test/regress/citus_tests/arbitrary_configs/citus_arbitrary_configs.py b/src/test/regress/citus_tests/arbitrary_configs/citus_arbitrary_configs.py index 1f509d479..6c9863434 100755 --- a/src/test/regress/citus_tests/arbitrary_configs/citus_arbitrary_configs.py +++ b/src/test/regress/citus_tests/arbitrary_configs/citus_arbitrary_configs.py @@ -12,23 +12,23 @@ Options: --seed= random number seed --base whether to use the base sql schedule or not """ +import concurrent.futures +import multiprocessing import os +import random import shutil import sys +import time + +from docopt import docopt # https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time/14132912#14132912 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +# ignore E402 because these imports require addition to path +import common # noqa: E402 -import concurrent.futures -import multiprocessing -import random -import time - -import common -from docopt import docopt - -import config as cfg +import config as cfg # noqa: E402 testResults = {} parallel_thread_amount = 1 @@ -151,14 +151,24 @@ def copy_test_files_with_names(test_names, sql_dir_path, expected_dir_path, conf continue sql_name = os.path.join("./sql", test_name + ".sql") - output_name = os.path.join("./expected", test_name + ".out") - shutil.copy(sql_name, sql_dir_path) - if os.path.isfile(output_name): + + # for a test named , all files: + # .out, _0.out, _1.out ... + # are considered as valid outputs for the test + # by the testing tool (pg_regress) + # so copy such files to the testing directory + output_name = os.path.join("./expected", test_name + ".out") + alt_output_version_no = 0 + while os.path.isfile(output_name): # it might be the first time we run this test and the expected file # might not be there yet, in that case, we don't want to error out # while copying the file. shutil.copy(output_name, expected_dir_path) + output_name = os.path.join( + "./expected", f"{test_name}_{alt_output_version_no}.out" + ) + alt_output_version_no += 1 def run_tests(configs, sql_schedule_name): diff --git a/src/test/regress/citus_tests/common.py b/src/test/regress/citus_tests/common.py index 1654905cb..121166a3e 100644 --- a/src/test/regress/citus_tests/common.py +++ b/src/test/regress/citus_tests/common.py @@ -1,13 +1,48 @@ +import asyncio import atexit import concurrent.futures import os +import platform +import random +import re import shutil +import socket import subprocess import sys +import time +import typing +from abc import ABC, abstractmethod +from contextlib import asynccontextmanager, closing, contextmanager +from datetime import datetime, timedelta +from pathlib import Path +from tempfile import gettempdir +import filelock +import psycopg +import psycopg.sql import utils +from psycopg import sql from utils import USER +LINUX = False +MACOS = False +FREEBSD = False +OPENBSD = False + +if platform.system() == "Linux": + LINUX = True +elif platform.system() == "Darwin": + MACOS = True +elif platform.system() == "FreeBSD": + FREEBSD = True +elif platform.system() == "OpenBSD": + OPENBSD = True + +BSD = MACOS or FREEBSD or OPENBSD + +TIMEOUT_DEFAULT = timedelta(seconds=int(os.getenv("PG_TEST_TIMEOUT_DEFAULT", "10"))) +FORCE_PORTS = os.getenv("PG_FORCE_PORTS", "NO").lower() not in ("no", "0", "n", "") + def initialize_temp_dir(temp_dir): if os.path.exists(temp_dir): @@ -315,8 +350,839 @@ def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) -def run(command, *args, shell=True, **kwargs): +def run(command, *args, check=True, shell=True, silent=False, **kwargs): """run runs the given command and prints it to stderr""" - eprint(f"+ {command} ") - return subprocess.run(command, *args, check=True, shell=shell, **kwargs) + if not silent: + eprint(f"+ {command} ") + if silent: + kwargs.setdefault("stdout", subprocess.DEVNULL) + return subprocess.run(command, *args, check=check, shell=shell, **kwargs) + + +def capture(command, *args, **kwargs): + """runs the given command and returns its output as a string""" + return run(command, *args, stdout=subprocess.PIPE, text=True, **kwargs).stdout + + +def sudo(command, *args, shell=True, **kwargs): + """ + A version of run that prefixes the command with sudo when the process is + not already run as root + """ + effective_user_id = os.geteuid() + if effective_user_id == 0: + return run(command, *args, shell=shell, **kwargs) + if shell: + return run(f"sudo {command}", *args, shell=shell, **kwargs) + else: + return run(["sudo", *command]) + + +# this is out of ephemeral port range for many systems hence +# it is a lower chance that it will conflict with "in-use" ports +PORT_LOWER_BOUND = 10200 + +# ephemeral port start on many Linux systems +PORT_UPPER_BOUND = 32768 + +next_port = PORT_LOWER_BOUND + + +def cleanup_test_leftovers(nodes): + """ + Cleaning up test leftovers needs to be done in a specific order, because + some of these leftovers depend on others having been removed. They might + even depend on leftovers on other nodes being removed. So this takes a list + of nodes, so that we can clean up all test leftovers globally in the + correct order. + """ + for node in nodes: + node.cleanup_subscriptions() + + for node in nodes: + node.cleanup_publications() + + for node in nodes: + node.cleanup_logical_replication_slots() + + for node in nodes: + node.cleanup_schemas() + + for node in nodes: + node.cleanup_users() + + +class PortLock: + """PortLock allows you to take a lock an a specific port. + + While a port is locked by one process, other processes using PortLock won't + get the same port. + """ + + def __init__(self): + global next_port + first_port = next_port + while True: + next_port += 1 + if next_port >= PORT_UPPER_BOUND: + next_port = PORT_LOWER_BOUND + + # avoid infinite loop + if first_port == next_port: + raise Exception("Could not find port") + + self.lock = filelock.FileLock(Path(gettempdir()) / f"port-{next_port}.lock") + try: + self.lock.acquire(timeout=0) + except filelock.Timeout: + continue + + if FORCE_PORTS: + self.port = next_port + break + + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: + try: + s.bind(("127.0.0.1", next_port)) + self.port = next_port + break + except Exception: + self.lock.release() + continue + + def release(self): + """Call release when you are done with the port. + + This way other processes can use it again. + """ + self.lock.release() + + +class QueryRunner(ABC): + """A subclassable interface class that can be used to run queries. + + This is mostly useful to be generic across differnt types of things that + implement the Postgres interface, such as Postgres, PgBouncer, or a Citus + cluster. + + This implements some helpers send queries in a simpler manner than psycopg + allows by default. + """ + + @abstractmethod + def set_default_connection_options(self, options: dict[str, typing.Any]): + """Sets the default connection options on the given options dictionary + + This is the only method that the class that subclasses QueryRunner + needs to implement. + """ + ... + + def make_conninfo(self, **kwargs) -> str: + self.set_default_connection_options(kwargs) + return psycopg.conninfo.make_conninfo(**kwargs) + + def conn(self, *, autocommit=True, **kwargs): + """Open a psycopg connection to this server""" + self.set_default_connection_options(kwargs) + return psycopg.connect( + autocommit=autocommit, + **kwargs, + ) + + def aconn(self, *, autocommit=True, **kwargs): + """Open an asynchronous psycopg connection to this server""" + self.set_default_connection_options(kwargs) + return psycopg.AsyncConnection.connect( + autocommit=autocommit, + **kwargs, + ) + + @contextmanager + def cur(self, autocommit=True, **kwargs): + """Open an psycopg cursor to this server + + The connection and the cursors automatically close once you leave the + "with" block + """ + with self.conn( + autocommit=autocommit, + **kwargs, + ) as conn: + with conn.cursor() as cur: + yield cur + + @asynccontextmanager + async def acur(self, **kwargs): + """Open an asynchronous psycopg cursor to this server + + The connection and the cursors automatically close once you leave the + "async with" block + """ + async with await self.aconn(**kwargs) as conn: + async with conn.cursor() as cur: + yield cur + + def sql(self, query, params=None, **kwargs): + """Run an SQL query + + This opens a new connection and closes it once the query is done + """ + with self.cur(**kwargs) as cur: + cur.execute(query, params=params) + + def sql_value(self, query, params=None, allow_empty_result=False, **kwargs): + """Run an SQL query that returns a single cell and return this value + + This opens a new connection and closes it once the query is done + """ + with self.cur(**kwargs) as cur: + cur.execute(query, params=params) + result = cur.fetchall() + + if allow_empty_result and len(result) == 0: + return None + + assert len(result) == 1 + assert len(result[0]) == 1 + value = result[0][0] + return value + + def asql(self, query, **kwargs): + """Run an SQL query in asynchronous task + + This opens a new connection and closes it once the query is done + """ + return asyncio.ensure_future(self.asql_coroutine(query, **kwargs)) + + async def asql_coroutine( + self, query, params=None, **kwargs + ) -> typing.Optional[typing.List[typing.Any]]: + async with self.acur(**kwargs) as cur: + await cur.execute(query, params=params) + try: + return await cur.fetchall() + except psycopg.ProgrammingError as e: + if "the last operation didn't produce a result" == str(e): + return None + raise + + def psql(self, query, **kwargs): + """Run an SQL query using psql instead of psycopg + + This opens a new connection and closes it once the query is done + """ + + conninfo = self.make_conninfo(**kwargs) + + run( + ["psql", "-X", f"{conninfo}", "-c", query], + shell=False, + silent=True, + ) + + def poll_query_until(self, query, params=None, expected=True, **kwargs): + """Run query repeatedly until it returns the expected result""" + start = datetime.now() + result = None + + while start + TIMEOUT_DEFAULT > datetime.now(): + result = self.sql_value( + query, params=params, allow_empty_result=True, **kwargs + ) + if result == expected: + return + + time.sleep(0.1) + + raise Exception( + f"Timeout reached while polling query, last result was: {result}" + ) + + @contextmanager + def transaction(self, **kwargs): + with self.cur(**kwargs) as cur: + with cur.connection.transaction(): + yield cur + + def sleep(self, duration=3, **kwargs): + """Run pg_sleep""" + return self.sql(f"select pg_sleep({duration})", **kwargs) + + def asleep(self, duration=3, times=1, sequentially=False, **kwargs): + """Run pg_sleep asynchronously in a task. + + times: + You can create a single task that opens multiple connections, which + run pg_sleep concurrently. The asynchronous task will only complete + once all these pg_sleep calls are finished. + sequentially: + Instead of running all pg_sleep calls spawned by providing + times > 1 concurrently, this will run them sequentially. + """ + return asyncio.ensure_future( + self.asleep_coroutine( + duration=duration, times=times, sequentially=sequentially, **kwargs + ) + ) + + async def asleep_coroutine(self, duration=3, times=1, sequentially=False, **kwargs): + """This is the coroutine that the asleep task runs internally""" + if not sequentially: + await asyncio.gather( + *[ + self.asql(f"select pg_sleep({duration})", **kwargs) + for _ in range(times) + ] + ) + else: + for _ in range(times): + await self.asql(f"select pg_sleep({duration})", **kwargs) + + def test(self, **kwargs): + """Test if you can connect""" + return self.sql("select 1", **kwargs) + + def atest(self, **kwargs): + """Test if you can connect asynchronously""" + return self.asql("select 1", **kwargs) + + def psql_test(self, **kwargs): + """Test if you can connect with psql instead of psycopg""" + return self.psql("select 1", **kwargs) + + def debug(self): + print("Connect manually to:\n ", repr(self.make_conninfo())) + print("Press Enter to continue running the test...") + input() + + def psql_debug(self, **kwargs): + conninfo = self.make_conninfo(**kwargs) + run( + ["psql", f"{conninfo}"], + shell=False, + silent=True, + ) + + +class Postgres(QueryRunner): + """A class that represents a Postgres instance on this machine + + You can query it by using the interface provided by QueryRunner or use many + of the helper methods. + """ + + def __init__(self, pgdata): + self.port_lock = PortLock() + + # These values should almost never be changed after initialization + self.host = "127.0.0.1" + self.port = self.port_lock.port + + # These values can be changed when needed + self.dbname = "postgres" + self.user = "postgres" + self.schema = None + + self.pgdata = pgdata + self.log_path = self.pgdata / "pg.log" + + # Used to track objects that we want to clean up at the end of a test + self.subscriptions = set() + self.publications = set() + self.logical_replication_slots = set() + self.schemas = set() + self.users = set() + + def set_default_connection_options(self, options): + options.setdefault("host", self.host) + options.setdefault("port", self.port) + options.setdefault("dbname", self.dbname) + options.setdefault("user", self.user) + if self.schema is not None: + options.setdefault("options", f"-c search_path={self.schema}") + options.setdefault("connect_timeout", 3) + # needed for Ubuntu 18.04 + options.setdefault("client_encoding", "UTF8") + + def initdb(self): + run( + f"initdb -A trust --nosync --username postgres --pgdata {self.pgdata} --allow-group-access --encoding UTF8 --locale POSIX", + stdout=subprocess.DEVNULL, + ) + + with self.conf_path.open(mode="a") as pgconf: + # Allow connecting over unix sockets + pgconf.write("unix_socket_directories = '/tmp'\n") + + # Useful logs for debugging issues + pgconf.write("log_replication_commands = on\n") + # The following to are also useful for debugging, but quite noisy. + # So better to enable them manually by uncommenting. + # pgconf.write("log_connections = on\n") + # pgconf.write("log_disconnections = on\n") + + # Enable citus + pgconf.write("shared_preload_libraries = 'citus'\n") + + # Allow CREATE SUBSCRIPTION to work + pgconf.write("wal_level = 'logical'\n") + # Faster logical replication status update so tests with logical replication + # run faster + pgconf.write("wal_receiver_status_interval = 1\n") + + # Faster logical replication apply worker launch so tests with logical + # replication run faster. This is used in ApplyLauncherMain in + # src/backend/replication/logical/launcher.c. + pgconf.write("wal_retrieve_retry_interval = '250ms'\n") + + # Make sure there's enough logical replication resources for most + # of our tests + pgconf.write("max_logical_replication_workers = 50\n") + pgconf.write("max_wal_senders = 50\n") + pgconf.write("max_worker_processes = 50\n") + pgconf.write("max_replication_slots = 50\n") + + # We need to make the log go to stderr so that the tests can + # check what is being logged. This should be the default, but + # some packagings change the default configuration. + pgconf.write("log_destination = stderr\n") + # We don't need the logs anywhere else than stderr + pgconf.write("logging_collector = off\n") + + # This makes tests run faster and we don't care about crash safety + # of our test data. + pgconf.write("fsync = false\n") + + # conservative settings to ensure we can run multiple postmasters: + pgconf.write("shared_buffers = 1MB\n") + # limit disk space consumption, too: + pgconf.write("max_wal_size = 128MB\n") + + # don't restart after crashes to make it obvious that a crash + # happened + pgconf.write("restart_after_crash = off\n") + + os.truncate(self.hba_path, 0) + self.ssl_access("all", "trust") + self.nossl_access("all", "trust") + self.commit_hba() + + def init_with_citus(self): + self.initdb() + self.start() + self.sql("CREATE EXTENSION citus") + + # Manually turn on ssl, so that we can safely truncate + # postgresql.auto.conf later. We can only do this after creating the + # citus extension because that creates the self signed certificates. + with self.conf_path.open(mode="a") as pgconf: + pgconf.write("ssl = on\n") + + def pgctl(self, command, **kwargs): + run(f"pg_ctl -w --pgdata {self.pgdata} {command}", **kwargs) + + def apgctl(self, command, **kwargs): + return asyncio.create_subprocess_shell( + f"pg_ctl -w --pgdata {self.pgdata} {command}", **kwargs + ) + + def start(self): + try: + self.pgctl(f'-o "-p {self.port}" -l {self.log_path} start') + except Exception: + print(f"\n\nPG_LOG: {self.pgdata}\n") + with self.log_path.open() as f: + print(f.read()) + raise + + def stop(self, mode="fast"): + self.pgctl(f"-m {mode} stop", check=False) + + def cleanup(self): + self.stop() + self.port_lock.release() + + def restart(self): + self.stop() + self.start() + + def reload(self): + self.pgctl("reload") + # Sadly UNIX signals are asynchronous, so we sleep a bit and hope that + # Postgres actually processed the SIGHUP signal after the sleep. + time.sleep(0.1) + + async def arestart(self): + process = await self.apgctl("-m fast restart") + await process.communicate() + + def nossl_access(self, dbname, auth_type): + """Prepends a local non-SSL access to the HBA file""" + with self.hba_path.open() as pghba: + old_contents = pghba.read() + with self.hba_path.open(mode="w") as pghba: + pghba.write(f"local {dbname} all {auth_type}\n") + pghba.write(f"hostnossl {dbname} all 127.0.0.1/32 {auth_type}\n") + pghba.write(f"hostnossl {dbname} all ::1/128 {auth_type}\n") + pghba.write(old_contents) + + def ssl_access(self, dbname, auth_type): + """Prepends a local SSL access rule to the HBA file""" + with self.hba_path.open() as pghba: + old_contents = pghba.read() + with self.hba_path.open(mode="w") as pghba: + pghba.write(f"hostssl {dbname} all 127.0.0.1/32 {auth_type}\n") + pghba.write(f"hostssl {dbname} all ::1/128 {auth_type}\n") + pghba.write(old_contents) + + @property + def hba_path(self): + return self.pgdata / "pg_hba.conf" + + @property + def conf_path(self): + return self.pgdata / "postgresql.conf" + + def commit_hba(self): + """Mark the current HBA contents as non-resetable by reset_hba""" + with self.hba_path.open() as pghba: + old_contents = pghba.read() + with self.hba_path.open(mode="w") as pghba: + pghba.write("# committed-rules\n") + pghba.write(old_contents) + + def reset_hba(self): + """Remove any HBA rules that were added after the last call to commit_hba""" + with self.hba_path.open() as f: + hba_contents = f.read() + committed = hba_contents[hba_contents.find("# committed-rules\n") :] + with self.hba_path.open("w") as f: + f.write(committed) + + def prepare_reset(self): + """Prepares all changes to reset Postgres settings and objects + + To actually apply the prepared changes a restart might still be needed. + """ + self.reset_hba() + os.truncate(self.pgdata / "postgresql.auto.conf", 0) + + def reset(self): + """Resets any changes to Postgres settings from previous tests""" + self.prepare_reset() + self.restart() + + async def delayed_start(self, delay=1): + """Start Postgres after a delay + + NOTE: The sleep is asynchronous, but while waiting for Postgres to + start the pg_ctl start command will block the event loop. This is + currently acceptable for our usage of this method in the existing + tests and this way it was easiest to implement. However, it seems + totally reasonable to change this behaviour in the future if necessary. + """ + await asyncio.sleep(delay) + self.start() + + def configure(self, *configs): + """Configure specific Postgres settings using ALTER SYSTEM SET + + NOTE: after configuring a call to reload or restart is needed for the + settings to become effective. + """ + for config in configs: + self.sql(f"alter system set {config}") + + def log_handle(self): + """Returns the opened logfile at the current end of the log + + By later calling read on this file you can read the contents that were + written from this moment on. + + IMPORTANT: This handle should be closed once it's not needed anymore + """ + f = self.log_path.open() + f.seek(0, os.SEEK_END) + return f + + @contextmanager + def log_contains(self, re_string, times=None): + """Checks if during this with block the log matches re_string + + re_string: + The regex to search for. + times: + If None, any number of matches is accepted. If a number, only that + specific number of matches is accepted. + """ + with self.log_handle() as f: + yield + content = f.read() + if times is None: + assert re.search(re_string, content) + else: + match_count = len(re.findall(re_string, content)) + assert match_count == times + + def create_user(self, name, args: typing.Optional[psycopg.sql.Composable] = None): + self.users.add(name) + if args is None: + args = sql.SQL("") + self.sql(sql.SQL("CREATE USER {} {}").format(sql.Identifier(name), args)) + + def create_schema(self, name): + self.schemas.add(name) + self.sql(sql.SQL("CREATE SCHEMA {}").format(sql.Identifier(name))) + + def create_publication(self, name: str, args: psycopg.sql.Composable): + self.publications.add(name) + self.sql(sql.SQL("CREATE PUBLICATION {} {}").format(sql.Identifier(name), args)) + + def create_logical_replication_slot( + self, name, plugin, temporary=False, twophase=False + ): + self.logical_replication_slots.add(name) + self.sql( + "SELECT pg_catalog.pg_create_logical_replication_slot(%s,%s,%s,%s)", + (name, plugin, temporary, twophase), + ) + + def create_subscription(self, name: str, args: psycopg.sql.Composable): + self.subscriptions.add(name) + self.sql( + sql.SQL("CREATE SUBSCRIPTION {} {}").format(sql.Identifier(name), args) + ) + + def cleanup_users(self): + for user in self.users: + self.sql(sql.SQL("DROP USER IF EXISTS {}").format(sql.Identifier(user))) + + def cleanup_schemas(self): + for schema in self.schemas: + self.sql( + sql.SQL("DROP SCHEMA IF EXISTS {} CASCADE").format( + sql.Identifier(schema) + ) + ) + + def cleanup_publications(self): + for publication in self.publications: + self.sql( + sql.SQL("DROP PUBLICATION IF EXISTS {}").format( + sql.Identifier(publication) + ) + ) + + def cleanup_logical_replication_slots(self): + for slot in self.logical_replication_slots: + self.sql( + "SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name = %s", + (slot,), + ) + + def cleanup_subscriptions(self): + for subscription in self.subscriptions: + try: + self.sql( + sql.SQL("ALTER SUBSCRIPTION {} DISABLE").format( + sql.Identifier(subscription) + ) + ) + except psycopg.errors.UndefinedObject: + # Subscription didn't exist already + continue + self.sql( + sql.SQL("ALTER SUBSCRIPTION {} SET (slot_name = NONE)").format( + sql.Identifier(subscription) + ) + ) + self.sql( + sql.SQL("DROP SUBSCRIPTION {}").format(sql.Identifier(subscription)) + ) + + def lsn(self, mode): + """Returns the lsn for the given mode""" + queries = { + "insert": "SELECT pg_current_wal_insert_lsn()", + "flush": "SELECT pg_current_wal_flush_lsn()", + "write": "SELECT pg_current_wal_lsn()", + "receive": "SELECT pg_last_wal_receive_lsn()", + "replay": "SELECT pg_last_wal_replay_lsn()", + } + return self.sql_value(queries[mode]) + + def wait_for_catchup(self, subscription_name, mode="replay", target_lsn=None): + """Waits until the subscription has caught up""" + if target_lsn is None: + target_lsn = self.lsn("write") + + # Before release 12 walreceiver just set the application name to + # "walreceiver" + self.poll_query_until( + sql.SQL( + """ + SELECT {} <= {} AND state = 'streaming' + FROM pg_catalog.pg_stat_replication + WHERE application_name IN ({}, 'walreceiver') + """ + ).format(target_lsn, sql.Identifier(f"{mode}_lsn"), subscription_name) + ) + + @contextmanager + def _enable_firewall(self): + """Enables the firewall for the platform that you are running + + Normally this should not be called directly, and instead drop_traffic + or reject_traffic should be used. + """ + fw_token = None + if BSD: + if MACOS: + command_stderr = sudo( + "pfctl -E", stderr=subprocess.PIPE, text=True + ).stderr + match = re.search(r"^Token : (\d+)", command_stderr, flags=re.MULTILINE) + assert match is not None + fw_token = match.group(1) + sudo( + 'bash -c "' + f"echo 'anchor \\\"port_{self.port}\\\"'" + f' | pfctl -a citus_test -f -"' + ) + try: + yield + finally: + if MACOS: + sudo(f"pfctl -X {fw_token}") + + @contextmanager + def drop_traffic(self): + """Drops all TCP packets to this query runner""" + with self._enable_firewall(): + if LINUX: + sudo( + "iptables --append OUTPUT " + "--protocol tcp " + f"--destination {self.host} " + f"--destination-port {self.port} " + "--jump DROP " + ) + elif BSD: + sudo( + "bash -c '" + f'echo "block drop out proto tcp from any to {self.host} port {self.port}"' + f"| pfctl -a citus_test/port_{self.port} -f -'" + ) + else: + raise Exception("This OS cannot run this test") + try: + yield + finally: + if LINUX: + sudo( + "iptables --delete OUTPUT " + "--protocol tcp " + f"--destination {self.host} " + f"--destination-port {self.port} " + "--jump DROP " + ) + elif BSD: + sudo(f"pfctl -a citus_test/port_{self.port} -F all") + + @contextmanager + def reject_traffic(self): + """Rejects all traffic to this query runner with a TCP RST message""" + with self._enable_firewall(): + if LINUX: + sudo( + "iptables --append OUTPUT " + "--protocol tcp " + f"--destination {self.host} " + f"--destination-port {self.port} " + "--jump REJECT " + "--reject-with tcp-reset" + ) + elif BSD: + sudo( + "bash -c '" + f'echo "block return-rst out out proto tcp from any to {self.host} port {self.port}"' + f"| pfctl -a citus_test/port_{self.port} -f -'" + ) + else: + raise Exception("This OS cannot run this test") + try: + yield + finally: + if LINUX: + sudo( + "iptables --delete OUTPUT " + "--protocol tcp " + f"--destination {self.host} " + f"--destination-port {self.port} " + "--jump REJECT " + "--reject-with tcp-reset" + ) + elif BSD: + sudo(f"pfctl -a citus_test/port_{self.port} -F all") + + +class CitusCluster(QueryRunner): + """A class that represents a Citus cluster on this machine + + The nodes in the cluster can be accessed directly using the coordinator, + workers, and nodes properties. + + If it doesn't matter which of the nodes in the cluster is used to run a + query, then you can use the methods provided by QueryRunner directly on the + cluster. In that case a random node will be chosen to run your query. + """ + + def __init__(self, basedir: Path, worker_count: int): + self.coordinator = Postgres(basedir / "coordinator") + self.workers = [Postgres(basedir / f"worker{i}") for i in range(worker_count)] + self.nodes = [self.coordinator] + self.workers + self._schema = None + self.failed_reset = False + + parallel_run(Postgres.init_with_citus, self.nodes) + with self.coordinator.cur() as cur: + cur.execute( + "SELECT pg_catalog.citus_set_coordinator_host(%s, %s)", + (self.coordinator.host, self.coordinator.port), + ) + for worker in self.workers: + cur.execute( + "SELECT pg_catalog.citus_add_node(%s, %s)", + (worker.host, worker.port), + ) + + def set_default_connection_options(self, options): + random.choice(self.nodes).set_default_connection_options(options) + + @property + def schema(self): + return self._schema + + @schema.setter + def schema(self, value): + self._schema = value + for node in self.nodes: + node.schema = value + + def reset(self): + """Resets any changes to Postgres settings from previous tests""" + parallel_run(Postgres.prepare_reset, self.nodes) + parallel_run(Postgres.restart, self.nodes) + + def cleanup(self): + parallel_run(Postgres.cleanup, self.nodes) + + def debug(self): + """Print information to stdout to help with debugging your cluster""" + print("The nodes in this cluster and their connection strings are:") + for node in self.nodes: + print(f"{node.pgdata}:\n ", repr(node.make_conninfo())) + print("Press Enter to continue running the test...") + input() diff --git a/src/test/regress/citus_tests/run_test.py b/src/test/regress/citus_tests/run_test.py index 6ce58d480..9c180271f 100755 --- a/src/test/regress/citus_tests/run_test.py +++ b/src/test/regress/citus_tests/run_test.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +from __future__ import annotations import argparse import os @@ -7,162 +8,329 @@ import random import re import shutil import sys -from glob import glob +from collections import OrderedDict +from typing import Optional import common -import config - -args = argparse.ArgumentParser() -args.add_argument( - "test_name", help="Test name (must be included in a schedule.)", nargs="?" -) -args.add_argument( - "-p", - "--path", - required=False, - help="Relative path for test file (must have a .sql or .spec extension)", - type=pathlib.Path, -) -args.add_argument("-r", "--repeat", help="Number of test to run", type=int, default=1) -args.add_argument( - "-b", - "--use-base-schedule", - required=False, - help="Choose base-schedules rather than minimal-schedules", - action="store_true", -) -args.add_argument( - "-w", - "--use-whole-schedule-line", - required=False, - help="Use the whole line found in related schedule", - action="store_true", -) -args.add_argument( - "--valgrind", - required=False, - help="Run the test with valgrind enabled", - action="store_true", -) - -args = vars(args.parse_args()) - -regress_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) -test_file_path = args["path"] -test_file_name = args["test_name"] -use_base_schedule = args["use_base_schedule"] -use_whole_schedule_line = args["use_whole_schedule_line"] - -test_files_to_skip = [ - "multi_cluster_management", - "multi_extension", - "multi_test_helpers", - "multi_insert_select", -] -test_files_to_run_without_schedule = ["single_node_enterprise"] - -if not (test_file_name or test_file_path): - print("FATAL: No test given.") - sys.exit(2) +from config import ARBITRARY_SCHEDULE_NAMES, MASTER_VERSION, CitusDefaultClusterConfig -if test_file_path: - test_file_path = os.path.join(os.getcwd(), args["path"]) +# Returns true if given test_schedule_line is of the form: +# "test: upgrade_ ... _after .." +def schedule_line_is_upgrade_after(test_schedule_line: str) -> bool: + return ( + test_schedule_line.startswith("test: upgrade_") + and "_after" in test_schedule_line + ) - if not os.path.isfile(test_file_path): - print(f"ERROR: test file '{test_file_path}' does not exist") + +def run_python_test(test_file_name, repeat): + """Runs the test using pytest + + This function never returns as it usese os.execlp to replace the current + process with a new pytest process. + """ + test_path = regress_dir / "citus_tests" / "test" / f"{test_file_name}.py" + if not test_path.exists(): + raise Exception("Test could not be found in any schedule") + + os.execlp( + "pytest", + "pytest", + "--numprocesses", + "auto", + "--count", + str(repeat), + str(test_path), + ) + + +def run_schedule_with_python(schedule): + bindir = common.capture("pg_config --bindir").rstrip() + pgxs_path = pathlib.Path(common.capture("pg_config --pgxs").rstrip()) + + os.chdir(regress_dir) + os.environ["PATH"] = str(regress_dir / "bin") + os.pathsep + os.environ["PATH"] + os.environ["PG_REGRESS_DIFF_OPTS"] = "-dU10 -w" + os.environ["CITUS_OLD_VERSION"] = f"v{MASTER_VERSION}.0" + + args = { + "--pgxsdir": str(pgxs_path.parent.parent.parent), + "--bindir": bindir, + } + + config = CitusDefaultClusterConfig(args) + common.initialize_temp_dir(config.temp_dir) + common.initialize_citus_cluster( + config.bindir, config.datadir, config.settings, config + ) + common.run_pg_regress( + config.bindir, config.pg_srcdir, config.coordinator_port(), schedule + ) + + +if __name__ == "__main__": + args = argparse.ArgumentParser() + args.add_argument( + "test_name", help="Test name (must be included in a schedule.)", nargs="?" + ) + args.add_argument( + "-p", + "--path", + required=False, + help="Relative path for test file (must have a .sql or .spec extension)", + type=pathlib.Path, + ) + args.add_argument( + "-r", "--repeat", help="Number of test to run", type=int, default=1 + ) + args.add_argument( + "-b", + "--use-base-schedule", + required=False, + help="Choose base-schedules rather than minimal-schedules", + action="store_true", + ) + args.add_argument( + "-w", + "--use-whole-schedule-line", + required=False, + help="Use the whole line found in related schedule", + action="store_true", + ) + args.add_argument( + "--valgrind", + required=False, + help="Run the test with valgrind enabled", + action="store_true", + ) + + args = vars(args.parse_args()) + + regress_dir = pathlib.Path( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + ) + test_file_path = args["path"] + test_file_name = args["test_name"] + use_base_schedule = args["use_base_schedule"] + use_whole_schedule_line = args["use_whole_schedule_line"] + + class TestDeps: + schedule: Optional[str] + direct_extra_tests: list[str] + + def __init__(self, schedule, extra_tests=None, repeatable=True, worker_count=2): + self.schedule = schedule + self.direct_extra_tests = extra_tests or [] + self.repeatable = repeatable + self.worker_count = worker_count + + def extra_tests(self): + all_deps = OrderedDict() + for direct_dep in self.direct_extra_tests: + if direct_dep in deps: + for indirect_dep in deps[direct_dep].extra_tests(): + all_deps[indirect_dep] = True + all_deps[direct_dep] = True + + return list(all_deps.keys()) + + deps = { + "multi_cluster_management": TestDeps( + None, ["multi_test_helpers_superuser"], repeatable=False + ), + "create_role_propagation": TestDeps(None, ["multi_cluster_management"]), + "single_node_enterprise": TestDeps(None), + "single_node": TestDeps(None), + "single_node_truncate": TestDeps(None), + "multi_extension": TestDeps(None, repeatable=False), + "multi_test_helpers": TestDeps(None), + "multi_insert_select": TestDeps("base_schedule"), + "multi_mx_create_table": TestDeps( + None, + [ + "multi_test_helpers_superuser", + "multi_mx_node_metadata", + "multi_cluster_management", + "multi_mx_function_table_reference", + ], + ), + "background_rebalance_parallel": TestDeps( + None, + [ + "multi_test_helpers", + "multi_cluster_management", + ], + worker_count=6, + ), + "multi_mx_modifying_xacts": TestDeps(None, ["multi_mx_create_table"]), + "multi_mx_router_planner": TestDeps(None, ["multi_mx_create_table"]), + "multi_mx_copy_data": TestDeps(None, ["multi_mx_create_table"]), + "multi_mx_schema_support": TestDeps(None, ["multi_mx_copy_data"]), + "multi_simple_queries": TestDeps("base_schedule"), + } + + if not (test_file_name or test_file_path): + print("FATAL: No test given.") sys.exit(2) - test_file_extension = pathlib.Path(test_file_path).suffix - test_file_name = pathlib.Path(test_file_path).stem + if test_file_path: + test_file_path = os.path.join(os.getcwd(), args["path"]) - if test_file_extension not in ".spec.sql": - print( - "ERROR: Unrecognized test extension. Valid extensions are: .sql and .spec" + if not os.path.isfile(test_file_path): + print(f"ERROR: test file '{test_file_path}' does not exist") + sys.exit(2) + + test_file_extension = pathlib.Path(test_file_path).suffix + test_file_name = pathlib.Path(test_file_path).stem + + if test_file_extension not in (".spec", ".sql", ".py"): + print( + "ERROR: Unrecognized test extension. Valid extensions are: .sql, .spec, and .py" + ) + sys.exit(1) + + test_schedule = "" + dependencies = [] + + if test_file_name.startswith("test_"): + run_python_test(test_file_name, args["repeat"]) + + # find related schedule + for schedule_file_path in sorted(regress_dir.glob("*_schedule")): + for schedule_line in open(schedule_file_path, "r"): + if re.search(r"\b" + test_file_name + r"\b", schedule_line): + test_schedule = pathlib.Path(schedule_file_path).stem + if use_whole_schedule_line: + test_schedule_line = schedule_line + else: + test_schedule_line = f"test: {test_file_name}\n" + break + else: + continue + break + else: + raise Exception("Test could not be found in any schedule") + + def default_base_schedule(test_schedule): + if "isolation" in test_schedule: + return "base_isolation_schedule" + + if "failure" in test_schedule: + return "failure_base_schedule" + + if "enterprise" in test_schedule: + return "enterprise_minimal_schedule" + + if "split" in test_schedule: + return "minimal_schedule" + + if "mx" in test_schedule: + if use_base_schedule: + return "mx_base_schedule" + return "mx_minimal_schedule" + + if "operations" in test_schedule: + return "minimal_schedule" + + if "after_citus_upgrade" in test_schedule: + print( + f"WARNING: After citus upgrade schedule ({test_schedule}) is not supported." + ) + sys.exit(0) + + if "citus_upgrade" in test_schedule: + return None + + if "pg_upgrade" in test_schedule: + return "minimal_schedule" + + if test_schedule in ARBITRARY_SCHEDULE_NAMES: + print( + f"WARNING: Arbitrary config schedule ({test_schedule}) is not supported." + ) + sys.exit(0) + + if use_base_schedule: + return "base_schedule" + return "minimal_schedule" + + # we run the tests with 2 workers by default. + # If we find any dependency which requires more workers, we update the worker count. + def worker_count_for(test_name): + if test_name in deps: + return deps[test_name].worker_count + return 2 + + test_worker_count = max(worker_count_for(test_file_name), 2) + + if test_file_name in deps: + dependencies = deps[test_file_name] + elif schedule_line_is_upgrade_after(test_schedule_line): + dependencies = TestDeps( + default_base_schedule(test_schedule), + [test_file_name.replace("_after", "_before")], ) - sys.exit(1) - -# early exit if it's a test that needs to be skipped -if test_file_name in test_files_to_skip: - print(f"WARNING: Skipping exceptional test: '{test_file_name}'") - sys.exit(0) - -test_schedule = "" - -# find related schedule -for schedule_file_path in sorted(glob(os.path.join(regress_dir, "*_schedule"))): - for schedule_line in open(schedule_file_path, "r"): - if re.search(r"\b" + test_file_name + r"\b", schedule_line): - test_schedule = pathlib.Path(schedule_file_path).stem - if use_whole_schedule_line: - test_schedule_line = schedule_line - else: - test_schedule_line = f"test: {test_file_name}\n" - break else: - continue - break + dependencies = TestDeps(default_base_schedule(test_schedule)) -# map suitable schedule -if not test_schedule: - print(f"WARNING: Could not find any schedule for '{test_file_name}'") - sys.exit(0) -elif "isolation" in test_schedule: - test_schedule = "base_isolation_schedule" -elif "failure" in test_schedule: - test_schedule = "failure_base_schedule" -elif "enterprise" in test_schedule: - test_schedule = "enterprise_minimal_schedule" -elif "split" in test_schedule: - test_schedule = "minimal_schedule" -elif "mx" in test_schedule: - if use_base_schedule: - test_schedule = "mx_base_schedule" + if "before_" in test_schedule: + dependencies.repeatable = False + + # copy base schedule to a temp file and append test_schedule_line + # to be able to run tests in parallel (if test_schedule_line is a parallel group.) + tmp_schedule_path = os.path.join( + regress_dir, f"tmp_schedule_{ random.randint(1, 10000)}" + ) + # some tests don't need a schedule to run + # e.g tests that are in the first place in their own schedule + if dependencies.schedule: + shutil.copy2( + os.path.join(regress_dir, dependencies.schedule), tmp_schedule_path + ) + with open(tmp_schedule_path, "a") as myfile: + for dependency in dependencies.extra_tests(): + myfile.write(f"test: {dependency}\n") + test_worker_count = max(worker_count_for(dependency), test_worker_count) + + repetition_cnt = args["repeat"] + if repetition_cnt > 1 and not dependencies.repeatable: + repetition_cnt = 1 + print(f"WARNING: Cannot repeatably run this test: '{test_file_name}'") + for _ in range(repetition_cnt): + myfile.write(test_schedule_line) + + if "upgrade" in test_schedule_line: + try: + run_schedule_with_python(pathlib.Path(tmp_schedule_path).stem) + finally: + # remove temp schedule file + os.remove(tmp_schedule_path) + sys.exit(0) + + # find suitable make recipe + if dependencies.schedule == "base_isolation_schedule": + make_recipe = "check-isolation-custom-schedule" + elif dependencies.schedule == "failure_base_schedule": + make_recipe = "check-failure-custom-schedule" else: - test_schedule = "mx_minimal_schedule" -elif "operations" in test_schedule: - test_schedule = "minimal_schedule" -elif test_schedule in config.ARBITRARY_SCHEDULE_NAMES: - print(f"WARNING: Arbitrary config schedule ({test_schedule}) is not supported.") - sys.exit(0) -else: - if use_base_schedule: - test_schedule = "base_schedule" - else: - test_schedule = "minimal_schedule" + make_recipe = "check-custom-schedule" -# copy base schedule to a temp file and append test_schedule_line -# to be able to run tests in parallel (if test_schedule_line is a parallel group.) -tmp_schedule_path = os.path.join( - regress_dir, f"tmp_schedule_{ random.randint(1, 10000)}" -) -# some tests don't need a schedule to run -# e.g tests that are in the first place in their own schedule -if test_file_name not in test_files_to_run_without_schedule: - shutil.copy2(os.path.join(regress_dir, test_schedule), tmp_schedule_path) -with open(tmp_schedule_path, "a") as myfile: - for _ in range(args["repeat"]): - myfile.write(test_schedule_line) + if args["valgrind"]: + make_recipe += "-vg" -# find suitable make recipe -if "isolation" in test_schedule: - make_recipe = "check-isolation-custom-schedule" -elif "failure" in test_schedule: - make_recipe = "check-failure-custom-schedule" -else: - make_recipe = "check-custom-schedule" + # prepare command to run tests + test_command = ( + f"make -C {regress_dir} {make_recipe} " + f"WORKERCOUNT={test_worker_count} " + f"SCHEDULE='{pathlib.Path(tmp_schedule_path).stem}'" + ) -if args["valgrind"]: - make_recipe += "-vg" - -# prepare command to run tests -test_command = f"make -C {regress_dir} {make_recipe} SCHEDULE='{pathlib.Path(tmp_schedule_path).stem}'" - -# run test command n times -try: - print(f"Executing.. {test_command}") - result = common.run(test_command) -finally: - # remove temp schedule file - os.remove(tmp_schedule_path) + # run test command n times + try: + print(f"Executing.. {test_command}") + result = common.run(test_command) + finally: + # remove temp schedule file + os.remove(tmp_schedule_path) diff --git a/src/test/regress/citus_tests/test/README.md b/src/test/regress/citus_tests/test/README.md new file mode 100644 index 000000000..6aac98e49 --- /dev/null +++ b/src/test/regress/citus_tests/test/README.md @@ -0,0 +1,171 @@ +# Pytest based tests + +## Usage + +Run all tests in parallel: + +```bash +pytest -n auto +``` + +Run all tests sequentially: +```bash +pytest +``` + +Run a specific test: +```bash +pytest test/test_columnar.py::test_recovery +``` + +Run a specific test file in parallel: +```bash +pytest -n auto test/test_columnar.py +``` + +Run any test that contains a certain string in the name: +```bash +pytest -k recovery +``` + +Run tests without it capturing stdout/stderr. This can be useful to see the +logs of a passing test: +```bash +pytest -s test/test_columnar.py::test_recovery +``` + +## General info + +Our other tests work by comparing output of a sequence of SQL commands that's +executed by `psql` to an expected output. If there's a difference between the +expected and actual output, then the tests fails. This works fine for many +cases, but certain types of tests are hard to write and a lot of care usually +has to be taken to make sure output is completely identical in every run. + +The tests in this directory use a different approach and use +[`pytest`][pytest-docs] to run tests that are written in the Python programming +language. This idea is similar to TAP tests that are part of Postgres, with the +important difference that those are written in Perl. + +In the sections below you can find most stuff you'll need to know about +`pytest` to run and write such tests, but if you want more detailed info some +useful references are: +- [A blog with pytest tips and tricks][pytest-tips] +- [The official pytest docs][pytest-docs] + +[pytest-docs]: https://docs.pytest.org/en/stable/ +[pytest-tips]: https://pythontest.com/pytest-tips-tricks/ + +## Adding a new test + +Tests are automatically discovered by `pytest` using a simple but effective +heuristic. In this directory (`src/test/regress/citus_tests/test`) it finds +all of the files that are named `test_{some name}.py`. Those files +are then searched for function names starting with the `test_` prefix. All those +functions are considered tests by `pytest`. + + +### Fixtures aka Dependency Injection aka Teardown/Cleanup + +An important part of tests is that they have some dependencies. The most +important dependency for us is usually a running Citus cluster. These +dependencies are provided by what `pytest` calls [fixtures]. Fixtures are +functions that `yield` a value. Anything before the `yield` is done during setup +and anything after the yield is done during teardown of the test (or whole +session). All our fixtures are defined in `conftest.py`. + + +Using a fixture in a test is very easy, but looks like a lot of magic. All you +have to do is make sure your test function has an argument with the same name as +the name of the fixture. For example: + +```python +def test_some_query(cluster): + cluster.coordinator.sql("SELECT 1") +``` + +If you need a cluster of a specific size you can use the `cluster_factory` +fixture: +```python +def test_with_100_workers(cluster_factory): + cluster = cluster_factory(100) +``` + +If you want more details on how fixtures work a few useful pages of the pytest +docs are: +- [About fixtures][fixtures] +- [How to use fixtures][fixtures-how-to] +- [Fixtures reference][fixtures-reference] + +[fixtures]: https://docs.pytest.org/en/stable/explanation/fixtures.html +[fixtures-how-to]: https://docs.pytest.org/en/stable/how-to/fixtures.html +[fixtures-reference]: https://docs.pytest.org/en/stable/reference/fixtures.html +## Connecting to a test postgres + +Sometimes your test is failing in an unexpected way and the easiest way to find +out why is to connect to Postgres at a certain point interactively. + +### Using `psql_debug` +The easiest way is to use the `psql_debug()` method of your `Cluster` or +`Postgres` instance. +```python +def test_something(cluster): + # working stuff + + cluster.coordinator.psql_debug() + + # unexpectedly failing test +``` + +Then run this test with stdout/stderr capturing disabled (`-s`) and it will show +you an interactive `psql` prompt right at that point in the test: +```bash +$ pytest -s test/test_your_thing.py::test_something + +... + +psql (15.2) +SSL connection (protocol: TLSv1.3, cipher: TLS_AES_256_GCM_SHA384, compression: off) +Type "help" for help. + +127.0.0.1 postgres@postgres:10201-20016= +> select 1; + +``` + + +### Debug manually + +Sometimes you need to connect to more than one node though. For that you can use +a `Cluster` its `debug` method instead. + +```python +def test_something(cluster): + # working stuff + + cluster.debug() + + # unexpectedly failing test +``` + + +Then run this test with stdout/stderr capturing disabled (`-s`) and it will show +you the connection string for each of the nodes in the cluster: +```bash +$ PG_FORCE_PORTS=true pytest -s test/test_your_thing.py::test_something +... + +The nodes in this cluster and their connection strings are: +/tmp/pytest-of-jelte/pytest-752/cluster2-0/coordinator: + "host=127.0.0.1 port=10202 dbname=postgres user=postgres options='-c search_path=test_recovery' connect_timeout=3 client_encoding=UTF8" +/tmp/pytest-of-jelte/pytest-752/cluster2-0/worker0: + "host=127.0.0.1 port=10203 dbname=postgres user=postgres options='-c search_path=test_recovery' connect_timeout=3 client_encoding=UTF8" +/tmp/pytest-of-jelte/pytest-752/cluster2-0/worker1: + "host=127.0.0.1 port=10204 dbname=postgres user=postgres options='-c search_path=test_recovery' connect_timeout=3 client_encoding=UTF8" +Press Enter to continue running the test... +``` + +Then in another terminal you can manually connect to as many of them as you want. +Using `PG_FORCE_PORTS` is recommended here, to make sure that the ports will +stay the same across runs of the tests. That way you can reuse the connection +strings that you got from a previous run, if you need to debug again. diff --git a/src/test/regress/citus_tests/test/__init__.py b/src/test/regress/citus_tests/test/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/test/regress/citus_tests/test/conftest.py b/src/test/regress/citus_tests/test/conftest.py new file mode 100644 index 000000000..467f253f6 --- /dev/null +++ b/src/test/regress/citus_tests/test/conftest.py @@ -0,0 +1,95 @@ +import pytest +from common import CitusCluster, Postgres, cleanup_test_leftovers, parallel_run + + +@pytest.fixture(scope="session") +def cluster_factory_session(tmp_path_factory): + """The session level pytest fixture that creates and caches citus clusters + + IMPORTANT: This should not be used directly, but only indirectly through + the cluster_factory fixture. + """ + clusters = {} + + def _make_or_get_cluster(worker_count: int): + if worker_count not in clusters: + clusters[worker_count] = CitusCluster( + tmp_path_factory.mktemp(f"cluster{worker_count}-"), worker_count + ) + return clusters[worker_count] + + yield _make_or_get_cluster + + parallel_run(CitusCluster.cleanup, clusters.values()) + + +@pytest.fixture +def cluster_factory(cluster_factory_session, request): + """The pytest fixture that creates and caches citus clusters + + When the function provided by the factory is called, it returns a cluster + with the given worker count. This cluster is cached across tests, so that + future invocations with the same worker count don't need to create a + cluster again, but can reuse the previously created one. + + To try and make sure that tests don't depend on eachother this tries very + hard to clean up anything that is created during the test. + + It also prints the Postgres logs that were produced during the test to + stdout. Normally these will be hidden, but when a test fails pytest will + show all stdout output produced during the test. Thus showing the Postgres + logs in that case makes it easier to debug. + """ + + log_handles = [] + clusters = [] + nodes = [] + + def _make_or_get_cluster(worker_count: int): + nonlocal log_handles + nonlocal nodes + cluster = cluster_factory_session(worker_count) + if cluster.failed_reset: + cluster.reset() + cluster.failed_reset = False + clusters.append(cluster) + log_handles += [(node, node.log_handle()) for node in cluster.nodes] + nodes += cluster.nodes + + # Create a dedicated schema for the test and use it by default + cluster.coordinator.create_schema(request.node.originalname) + cluster.schema = request.node.originalname + + return cluster + + yield _make_or_get_cluster + + try: + # We clean up test leftovers on all nodes together, instead of per + # cluster. The reason for this is that some subscriptions/publication + # pairs might be between different clusters. And by cleaning them up + # all together, the ordering of the DROPs is easy to make correct. + cleanup_test_leftovers(nodes) + parallel_run(Postgres.prepare_reset, nodes) + parallel_run(Postgres.restart, nodes) + except Exception: + for cluster in clusters: + cluster.failed_reset = True + raise + finally: + for node, log in log_handles: + print(f"\n\nPG_LOG: {node.pgdata}\n") + print(log.read()) + log.close() + + +@pytest.fixture(name="coord") +def coordinator(cluster_factory): + """Sets up a clean single-node Citus cluster for this test""" + yield cluster_factory(0).coordinator + + +@pytest.fixture +def cluster(cluster_factory): + """Sets up a clean 2-worker Citus cluste for this test""" + yield cluster_factory(2) diff --git a/src/test/regress/citus_tests/test/test_columnar.py b/src/test/regress/citus_tests/test/test_columnar.py new file mode 100644 index 000000000..7366cd432 --- /dev/null +++ b/src/test/regress/citus_tests/test/test_columnar.py @@ -0,0 +1,117 @@ +import psycopg +import pytest + + +def test_freezing(coord): + coord.configure("vacuum_freeze_min_age = 50000", "vacuum_freeze_table_age = 50000") + coord.restart() + + # create columnar table and insert simple data to verify the data survives + # a crash + coord.sql("CREATE TABLE test_row(i int)") + coord.sql("INSERT INTO test_row VALUES (1) ") + coord.sql( + "CREATE TABLE test_columnar_freeze(i int) USING columnar WITH(autovacuum_enabled=false)" + ) + coord.sql("INSERT INTO test_columnar_freeze VALUES (1)") + + for _ in range(0, 7): + with coord.cur() as cur: + for _ in range(0, 10_000): + cur.execute("UPDATE test_row SET i = i + 1") + + frozen_age = coord.sql_value( + """ + select age(relfrozenxid) + from pg_class where relname='test_columnar_freeze'; + """ + ) + + assert frozen_age > 70_000, "columnar table was frozen" + coord.sql("VACUUM FREEZE test_columnar_freeze") + + frozen_age = coord.sql_value( + """ + select age(relfrozenxid) + from pg_class where relname='test_columnar_freeze'; + """ + ) + assert frozen_age < 70_000, "columnar table was not frozen" + + +def test_recovery(coord): + # create columnar table and insert simple data to verify the data survives a crash + coord.sql("CREATE TABLE t1 (a int, b text) USING columnar") + coord.sql( + "INSERT INTO t1 SELECT a, 'hello world ' || a FROM generate_series(1,1002) AS a" + ) + + # simulate crash + coord.stop("immediate") + coord.start() + + row_count = coord.sql_value("SELECT count(*) FROM t1") + assert row_count == 1002, "columnar didn't recover data before crash correctly" + + # truncate the table to verify the truncation survives a crash + coord.sql("TRUNCATE t1") + # simulate crash + coord.stop("immediate") + coord.start() + + row_count = coord.sql_value("SELECT count(*) FROM t1") + assert row_count == 0, "columnar didn't recover the truncate correctly" + + # test crashing while having an open transaction + with pytest.raises( + psycopg.OperationalError, + match="server closed the connection unexpectedly|consuming input failed: EOF detected", + ): + with coord.transaction() as cur: + cur.execute( + "INSERT INTO t1 SELECT a, 'hello world ' || a FROM generate_series(1,1003) AS a" + ) + # simulate crash + coord.stop("immediate") + + coord.start() + + row_count = coord.sql_value("SELECT count(*) FROM t1") + assert row_count == 0, "columnar didn't recover uncommited transaction" + + # test crashing while having a prepared transaction + with pytest.raises( + psycopg.OperationalError, + match="server closed the connection unexpectedly|consuming input failed: EOF detected", + ): + with coord.transaction() as cur: + cur.execute( + "INSERT INTO t1 SELECT a, 'hello world ' || a FROM generate_series(1,1004) AS a" + ) + cur.execute("PREPARE TRANSACTION 'prepared_xact_crash'") + # simulate crash + coord.stop("immediate") + + coord.start() + + row_count = coord.sql_value("SELECT count(*) FROM t1") + assert row_count == 0, "columnar didn't recover uncommitted prepared transaction" + + coord.sql("COMMIT PREPARED 'prepared_xact_crash'") + + row_count = coord.sql_value("SELECT count(*) FROM t1") + assert row_count == 1004, "columnar didn't recover committed transaction" + + # test crash recovery with copied data + with coord.cur() as cur: + with cur.copy("COPY t1 FROM STDIN") as copy: + copy.write_row((1, "a")) + copy.write_row((2, "b")) + copy.write_row((3, "c")) + + # simulate crash + coord.stop("immediate") + coord.start() + + row_count = coord.sql_value("SELECT count(*) FROM t1") + assert row_count == 1007, "columnar didn't recover after copy" diff --git a/src/test/regress/citus_tests/upgrade/citus_upgrade_test.py b/src/test/regress/citus_tests/upgrade/citus_upgrade_test.py index d36754d48..3ea51d5d9 100755 --- a/src/test/regress/citus_tests/upgrade/citus_upgrade_test.py +++ b/src/test/regress/citus_tests/upgrade/citus_upgrade_test.py @@ -19,15 +19,17 @@ import re import subprocess import sys +from docopt import docopt + # https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time/14132912#14132912 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -import common -import utils -from docopt import docopt -from utils import USER +# ignore E402 because these imports require addition to path +import common # noqa: E402 +import utils # noqa: E402 +from utils import USER # noqa: E402 -from config import ( +from config import ( # noqa: E402 AFTER_CITUS_UPGRADE_COORD_SCHEDULE, BEFORE_CITUS_UPGRADE_COORD_SCHEDULE, CITUS_VERSION_SQL, diff --git a/src/test/regress/citus_tests/upgrade/pg_upgrade_test.py b/src/test/regress/citus_tests/upgrade/pg_upgrade_test.py index dd7c990af..f4ee4301c 100755 --- a/src/test/regress/citus_tests/upgrade/pg_upgrade_test.py +++ b/src/test/regress/citus_tests/upgrade/pg_upgrade_test.py @@ -10,21 +10,22 @@ Options: --pgxsdir= Path to the PGXS directory(ex: ~/.pgenv/src/postgresql-11.3) """ +import atexit import os +import subprocess import sys +from docopt import docopt + # https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time/14132912#14132912 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -import atexit -import subprocess +# ignore E402 because these imports require addition to path +import common # noqa: E402 +import utils # noqa: E402 +from utils import USER # noqa: E402 -import common -import utils -from docopt import docopt -from utils import USER - -from config import ( +from config import ( # noqa: E402 AFTER_PG_UPGRADE_SCHEDULE, BEFORE_PG_UPGRADE_SCHEDULE, PGUpgradeConfig, diff --git a/src/test/regress/create_schedule b/src/test/regress/create_schedule index 82dfa2475..e301678b9 100644 --- a/src/test/regress/create_schedule +++ b/src/test/regress/create_schedule @@ -13,3 +13,5 @@ test: arbitrary_configs_truncate_create test: arbitrary_configs_truncate_cascade_create test: arbitrary_configs_truncate_partition_create test: arbitrary_configs_alter_table_add_constraint_without_name_create +test: merge_arbitrary_create +test: arbitrary_configs_router_create diff --git a/src/test/regress/enterprise_schedule b/src/test/regress/enterprise_schedule index 84341d23d..55791d43a 100644 --- a/src/test/regress/enterprise_schedule +++ b/src/test/regress/enterprise_schedule @@ -19,6 +19,7 @@ test: citus_local_tables_ent test: remove_coordinator # -------- +test: publication test: logical_replication test: multi_create_table test: multi_create_table_superuser diff --git a/src/test/regress/expected/aggregate_support.out b/src/test/regress/expected/aggregate_support.out index ba9d9e2f3..57bbbbe78 100644 --- a/src/test/regress/expected/aggregate_support.out +++ b/src/test/regress/expected/aggregate_support.out @@ -665,6 +665,8 @@ select array_collect_sort(val) from aggdata; (1 row) reset role; +drop owned by notsuper; +drop user notsuper; -- Test aggregation on coordinator set citus.coordinator_aggregation_strategy to 'row-gather'; select key, first(val order by id), last(val order by id) @@ -1233,11 +1235,40 @@ CREATE AGGREGATE newavg ( initcond1 = '{0,0}' ); SELECT run_command_on_workers($$select aggfnoid from pg_aggregate where aggfnoid::text like '%newavg%';$$); - run_command_on_workers + run_command_on_workers --------------------------------------------------------------------- (localhost,57637,t,aggregate_support.newavg) (localhost,57638,t,aggregate_support.newavg) (2 rows) +CREATE TYPE coord AS (x int, y int); +CREATE FUNCTION coord_minx_sfunc(state coord, new coord) +returns coord immutable language plpgsql as $$ +BEGIN + IF (state IS NULL OR new.x < state.x) THEN + RETURN new; + ELSE + RETURN state; + END IF; +END +$$; +create function coord_minx_finalfunc(state coord) +returns coord immutable language plpgsql as $$ +begin return state; +end; +$$; +-- custom aggregate that has the same name as a built-in function, but with a combinefunc +create aggregate min (coord) ( + sfunc = coord_minx_sfunc, + stype = coord, + finalfunc = coord_minx_finalfunc, + combinefunc = coord_minx_sfunc +); +select min((id,val)::coord) from aggdata; + min +--------------------------------------------------------------------- + (1,2) +(1 row) + set client_min_messages to error; drop schema aggregate_support cascade; diff --git a/src/test/regress/expected/arbitrary_configs_router.out b/src/test/regress/expected/arbitrary_configs_router.out new file mode 100644 index 000000000..a42b955cc --- /dev/null +++ b/src/test/regress/expected/arbitrary_configs_router.out @@ -0,0 +1,1561 @@ +SET search_path TO arbitrary_configs_router; +SET client_min_messages TO WARNING; +-- test simple select for a single row +SELECT * FROM articles_hash WHERE author_id = 10 AND id = 50; + id | author_id | title | word_count +--------------------------------------------------------------------- + 50 | 10 | anjanette | 19519 +(1 row) + +-- get all titles by a single author +SELECT title FROM articles_hash WHERE author_id = 10; + title +--------------------------------------------------------------------- + aggrandize + absentness + andelee + attemper + anjanette +(5 rows) + +-- try ordering them by word count +SELECT title, word_count FROM articles_hash + WHERE author_id = 10 + ORDER BY word_count DESC NULLS LAST; + title | word_count +--------------------------------------------------------------------- + anjanette | 19519 + aggrandize | 17277 + attemper | 14976 + andelee | 6363 + absentness | 1820 +(5 rows) + +-- look at last two articles by an author +SELECT title, id FROM articles_hash + WHERE author_id = 5 + ORDER BY id + LIMIT 2; + title | id +--------------------------------------------------------------------- + aruru | 5 + adversa | 15 +(2 rows) + +-- find all articles by two authors in same shard +-- but plan is not fast path router plannable due to +-- two distribution columns in the query +SELECT title, author_id FROM articles_hash + WHERE author_id = 7 OR author_id = 8 + ORDER BY author_id ASC, id; + title | author_id +--------------------------------------------------------------------- + aseptic | 7 + auriga | 7 + arsenous | 7 + archduchies | 7 + abeyance | 7 + agatized | 8 + assembly | 8 + aerophyte | 8 + anatine | 8 + alkylic | 8 +(10 rows) + +-- having clause is supported if it goes to a single shard +-- and single dist. key on the query +SELECT author_id, sum(word_count) AS corpus_size FROM articles_hash + WHERE author_id = 1 + GROUP BY author_id + HAVING sum(word_count) > 1000 + ORDER BY sum(word_count) DESC; + author_id | corpus_size +--------------------------------------------------------------------- + 1 | 35894 +(1 row) + +-- fast path planner only support = operator +SELECT * FROM articles_hash WHERE author_id <= 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +SELECT * FROM articles_hash WHERE author_id IN (1, 3) ORDER BY 1,2,3,4; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 3 | 3 | asternal | 10480 + 11 | 1 | alamo | 1347 + 13 | 3 | aseyev | 2255 + 21 | 1 | arcading | 5890 + 23 | 3 | abhorring | 6799 + 31 | 1 | athwartships | 7271 + 33 | 3 | autochrome | 8180 + 41 | 1 | aznavour | 11814 + 43 | 3 | affixal | 12723 +(10 rows) + +-- queries with CTEs cannot go through fast-path planning +WITH first_author AS ( SELECT id FROM articles_hash WHERE author_id = 1) +SELECT * FROM first_author; + id +--------------------------------------------------------------------- + 1 + 11 + 21 + 31 + 41 +(5 rows) + +-- two CTE joins also cannot go through fast-path planning +WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1), +id_title AS (SELECT id, title from articles_hash WHERE author_id = 1) +SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; + id | author_id | id | title +--------------------------------------------------------------------- + 1 | 1 | 1 | arsenous + 11 | 1 | 11 | alamo + 21 | 1 | 21 | arcading + 31 | 1 | 31 | athwartships + 41 | 1 | 41 | aznavour +(5 rows) + +-- this is a different case where each CTE is recursively planned and those goes +-- through the fast-path router planner, but the top level join is not +WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1), +id_title AS (SELECT id, title from articles_hash WHERE author_id = 2) +SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; + id | author_id | id | title +--------------------------------------------------------------------- +(0 rows) + +-- recursive CTEs are also cannot go through fast +-- path planning +WITH RECURSIVE hierarchy as ( + SELECT *, 1 AS level + FROM company_employees + WHERE company_id = 1 and manager_id = 0 + UNION + SELECT ce.*, (h.level+1) + FROM hierarchy h JOIN company_employees ce + ON (h.employee_id = ce.manager_id AND + h.company_id = ce.company_id AND + ce.company_id = 1)) +SELECT * FROM hierarchy WHERE LEVEL <= 2; + company_id | employee_id | manager_id | level +--------------------------------------------------------------------- + 1 | 1 | 0 | 1 + 1 | 2 | 1 | 2 + 1 | 3 | 1 | 2 +(3 rows) + +WITH update_article AS ( + UPDATE articles_hash SET word_count = 10 WHERE id = 1 AND word_count = 9 RETURNING * +) +SELECT * FROM update_article; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +WITH delete_article AS ( + DELETE FROM articles_hash WHERE id = 1 AND word_count = 10 RETURNING * +) +SELECT * FROM delete_article; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- grouping sets are supported via fast-path +SELECT + id, substring(title, 2, 1) AS subtitle, count(*) + FROM articles_hash + WHERE author_id = 1 + GROUP BY GROUPING SETS ((id),(subtitle)) + ORDER BY id, subtitle; + id | subtitle | count +--------------------------------------------------------------------- + 1 | | 1 + 11 | | 1 + 21 | | 1 + 31 | | 1 + 41 | | 1 + | l | 1 + | r | 2 + | t | 1 + | z | 1 +(9 rows) + +-- queries which involve functions in FROM clause are not supported via fast path planning +SELECT * FROM articles_hash, position('om' in 'Thomas') WHERE author_id = 1; + id | author_id | title | word_count | position +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 | 3 + 11 | 1 | alamo | 1347 | 3 + 21 | 1 | arcading | 5890 | 3 + 31 | 1 | athwartships | 7271 | 3 + 41 | 1 | aznavour | 11814 | 3 +(5 rows) + +-- sublinks are not supported via fast path planning +SELECT * FROM articles_hash +WHERE author_id IN (SELECT author_id FROM articles_hash WHERE author_id = 2) +ORDER BY articles_hash.id; + id | author_id | title | word_count +--------------------------------------------------------------------- + 2 | 2 | abducing | 13642 + 12 | 2 | archiblast | 18185 + 22 | 2 | antipope | 2728 + 32 | 2 | amazon | 11342 + 42 | 2 | ausable | 15885 +(5 rows) + +-- subqueries are not supported via fast path planning +SELECT articles_hash.id,test.word_count +FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test WHERE test.id = articles_hash.id +ORDER BY test.word_count DESC, articles_hash.id LIMIT 5; + id | word_count +--------------------------------------------------------------------- + 50 | 19519 + 14 | 19094 + 48 | 18610 + 12 | 18185 + 46 | 17702 +(5 rows) + +SELECT articles_hash.id,test.word_count +FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test +WHERE test.id = articles_hash.id and articles_hash.author_id = 1 +ORDER BY articles_hash.id; + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +-- simple lookup query just works +SELECT * + FROM articles_hash + WHERE author_id = 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- below query hits a single shard but with multiple filters +-- so cannot go via fast-path +SELECT * + FROM articles_hash + WHERE author_id = 1 OR author_id = 17; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- rename the output columns +SELECT id as article_id, word_count * id as random_value + FROM articles_hash + WHERE author_id = 1; + article_id | random_value +--------------------------------------------------------------------- + 1 | 9572 + 11 | 14817 + 21 | 123690 + 31 | 225401 + 41 | 484374 +(5 rows) + +-- joins do not go through fast-path planning +SELECT a.author_id as first_author, b.word_count as second_word_count + FROM articles_hash a, articles_hash b + WHERE a.author_id = 10 and a.author_id = b.author_id + LIMIT 3; + first_author | second_word_count +--------------------------------------------------------------------- + 10 | 17277 + 10 | 1820 + 10 | 6363 +(3 rows) + +-- single shard select with limit goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + LIMIT 3; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 +(3 rows) + +-- single shard select with limit + offset goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + LIMIT 2 + OFFSET 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 +(2 rows) + +-- single shard select with limit + offset + order by goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id desc + LIMIT 2 + OFFSET 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 31 | 1 | athwartships | 7271 + 21 | 1 | arcading | 5890 +(2 rows) + +-- single shard select with group by on non-partition column goes through fast-path planning +SELECT id + FROM articles_hash + WHERE author_id = 1 + GROUP BY id + ORDER BY id; + id +--------------------------------------------------------------------- + 1 + 11 + 21 + 31 + 41 +(5 rows) + +-- single shard select with distinct goes through fast-path planning +SELECT DISTINCT id + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; + id +--------------------------------------------------------------------- + 1 + 11 + 21 + 31 + 41 +(5 rows) + +-- single shard aggregate goes through fast-path planning +SELECT avg(word_count) + FROM articles_hash + WHERE author_id = 2; + avg +--------------------------------------------------------------------- + 12356.400000000000 +(1 row) + +-- max, min, sum, count goes through fast-path planning +SELECT max(word_count) as max, min(word_count) as min, + sum(word_count) as sum, count(word_count) as cnt + FROM articles_hash + WHERE author_id = 2; + max | min | sum | cnt +--------------------------------------------------------------------- + 18185 | 2728 | 61782 | 5 +(1 row) + +-- queries with aggregates and group by goes through fast-path planning +SELECT max(word_count) + FROM articles_hash + WHERE author_id = 1 + GROUP BY author_id; + max +--------------------------------------------------------------------- + 11814 +(1 row) + +-- set operations are not supported via fast-path planning +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 3 +) AS combination +ORDER BY id; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 3 | 3 | asternal | 10480 + 11 | 1 | alamo | 1347 + 13 | 3 | aseyev | 2255 + 21 | 1 | arcading | 5890 + 23 | 3 | abhorring | 6799 + 31 | 1 | athwartships | 7271 + 33 | 3 | autochrome | 8180 + 41 | 1 | aznavour | 11814 + 43 | 3 | affixal | 12723 +(10 rows) + +-- function calls in the target list is supported via fast path +SELECT LEFT(title, 1) FROM articles_hash WHERE author_id = 1; + left +--------------------------------------------------------------------- + a + a + a + a + a +(5 rows) + +-- top-level union queries are supported through recursive planning +-- unions in subqueries are not supported via fast-path planning +SELECT * FROM ( + (SELECT * FROM articles_hash WHERE author_id = 1) + UNION + (SELECT * FROM articles_hash WHERE author_id = 1)) uu +ORDER BY 1, 2 +LIMIT 5; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- Test various filtering options for router plannable check +-- cannot go through fast-path if there is +-- explicit coercion +SELECT * + FROM articles_hash + WHERE author_id = 1::bigint; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- can go through fast-path if there is +-- implicit coercion +-- This doesn't work see the related issue +-- reported https://github.com/citusdata/citus/issues/2605 +-- SELECT * +-- FROM articles_hash +-- WHERE author_id = 1.0; +SELECT * + FROM articles_hash + WHERE author_id = 68719476736; -- this is bigint + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- cannot go through fast-path due to +-- multiple filters on the dist. key +SELECT * + FROM articles_hash + WHERE author_id = 1 and author_id >= 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- cannot go through fast-path due to +-- multiple filters on the dist. key +SELECT * + FROM articles_hash + WHERE author_id = 1 or id = 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- goes through fast-path planning because +-- the dist. key is ANDed with the rest of the +-- filters +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = 1 or id = 41); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 41 | 1 | aznavour | 11814 +(2 rows) + +-- this time there is an OR clause which prevents +-- router planning at all +SELECT * + FROM articles_hash + WHERE author_id = 1 and id = 1 or id = 41; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 41 | 1 | aznavour | 11814 +(2 rows) + +-- goes through fast-path planning because +-- the dist. key is ANDed with the rest of the +-- filters +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = random()::int * 0); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- not router plannable due to function call on the right side +SELECT * + FROM articles_hash + WHERE author_id = (random()::int * 0 + 1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE author_id = abs(-1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE 1 = abs(author_id); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE author_id = abs(author_id - 2); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- the function is not on the dist. key, so qualify as +-- fast-path +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = abs(id - 2)); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 +(1 row) + +-- not router plannable due to is true +SELECT * + FROM articles_hash + WHERE (author_id = 1) is true; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- router plannable, (boolean expression) = true is collapsed to (boolean expression) +SELECT * + FROM articles_hash + WHERE (author_id = 1) = true; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- some more complex quals +SELECT count(*) FROM articles_hash WHERE (author_id = 15) AND (id = 1 OR word_count > 5); + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (author_id = 15) OR (id = 1 AND word_count > 5); + count +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) OR (author_id = 1 AND word_count > 5); + count +--------------------------------------------------------------------- + 6 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (author_id = 1 OR word_count > 5); + count +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (author_id = 1 AND (word_count > 5 OR id = 2)); + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND (word_count > 5 OR author_id = 2)); + count +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND (word_count > 5 AND author_id = 2)); + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND ((word_count > 5 OR title ilike 'b%' ) AND (author_id = 2 AND word_count > 50))); + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- fast-path router plannable, between operator is on another column +SELECT * + FROM articles_hash + WHERE (author_id = 1) and id between 0 and 20; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 +(2 rows) + +-- fast-path router plannable, partition column expression is and'ed to rest +SELECT * + FROM articles_hash + WHERE (author_id = 1) and (id = 1 or id = 31) and title like '%s'; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 31 | 1 | athwartships | 7271 +(2 rows) + +-- fast-path router plannable, order is changed +SELECT * + FROM articles_hash + WHERE (id = 1 or id = 31) and title like '%s' and (author_id = 1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 31 | 1 | athwartships | 7271 +(2 rows) + +-- fast-path router plannable +SELECT * + FROM articles_hash + WHERE (title like '%s' or title like 'a%') and (author_id = 1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- fast-path router plannable +SELECT * + FROM articles_hash + WHERE (title like '%s' or title like 'a%') and (author_id = 1) and (word_count < 3000 or word_count > 8000); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 41 | 1 | aznavour | 11814 +(3 rows) + +-- window functions are supported with fast-path router plannable +SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count + FROM articles_hash + WHERE author_id = 5; + prev | title | word_count +--------------------------------------------------------------------- + | afrasia | 864 + afrasia | adversa | 3164 + adversa | antehall | 7707 + antehall | aminate | 9089 + aminate | aruru | 11389 +(5 rows) + +SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count + FROM articles_hash + WHERE author_id = 5 + ORDER BY word_count DESC; + prev | title | word_count +--------------------------------------------------------------------- + aminate | aruru | 11389 + antehall | aminate | 9089 + adversa | antehall | 7707 + afrasia | adversa | 3164 + | afrasia | 864 +(5 rows) + +SELECT id, MIN(id) over (order by word_count) + FROM articles_hash + WHERE author_id = 1; + id | min +--------------------------------------------------------------------- + 11 | 11 + 21 | 11 + 31 | 11 + 1 | 1 + 41 | 1 +(5 rows) + +SELECT id, word_count, AVG(word_count) over (order by word_count) + FROM articles_hash + WHERE author_id = 1; + id | word_count | avg +--------------------------------------------------------------------- + 11 | 1347 | 1347.0000000000000000 + 21 | 5890 | 3618.5000000000000000 + 31 | 7271 | 4836.0000000000000000 + 1 | 9572 | 6020.0000000000000000 + 41 | 11814 | 7178.8000000000000000 +(5 rows) + +SELECT word_count, rank() OVER (PARTITION BY author_id ORDER BY word_count) + FROM articles_hash + WHERE author_id = 1; + word_count | rank +--------------------------------------------------------------------- + 1347 | 1 + 5890 | 2 + 7271 | 3 + 9572 | 4 + 11814 | 5 +(5 rows) + +-- some more tests on complex target lists +SELECT DISTINCT ON (author_id, id) author_id, id, + MIN(id) over (order by avg(word_count)) * AVG(id * 5.2 + (1.0/max(word_count))) over (order by max(word_count)) as t1, + count(*) FILTER (WHERE title LIKE 'al%') as cnt_with_filter, + count(*) FILTER (WHERE '0300030' LIKE '%3%') as cnt_with_filter_2, + avg(case when id > 2 then char_length(word_count::text) * (id * strpos(word_count::text, '1')) end) as case_cnt, + COALESCE(strpos(avg(word_count)::text, '1'), 20) + FROM articles_hash as aliased_table + WHERE author_id = 1 + GROUP BY author_id, id + HAVING count(DISTINCT title) > 0 + ORDER BY author_id, id, sum(word_count) - avg(char_length(title)) DESC, COALESCE(array_upper(ARRAY[max(id)],1) * 5,0) DESC; + author_id | id | t1 | cnt_with_filter | cnt_with_filter_2 | case_cnt | coalesce +--------------------------------------------------------------------- + 1 | 1 | 83.20028854345579490574 | 0 | 1 | | 0 + 1 | 11 | 629.20816629547141796586 | 1 | 1 | 44.0000000000000000 | 1 + 1 | 21 | 915.20501693381380745499 | 0 | 1 | 0.00000000000000000000 | 0 + 1 | 31 | 1201.20384890897723321000 | 0 | 1 | 496.0000000000000000 | 4 + 1 | 41 | 109.200247763831844321405335 | 0 | 1 | 205.0000000000000000 | 1 +(5 rows) + +-- where false queries are router plannable but not fast-path +SELECT * + FROM articles_hash + WHERE false; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- fast-path with false +SELECT * + FROM articles_hash + WHERE author_id = 1 and false; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- fast-path with false +SELECT * + FROM articles_hash + WHERE author_id = 1 and 1=0; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +SELECT * + FROM articles_hash + WHERE null and author_id = 1; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- we cannot qualify dist_key = X operator Y via +-- fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + 1; + id | author_id | title | word_count +--------------------------------------------------------------------- + 2 | 2 | abducing | 13642 + 12 | 2 | archiblast | 18185 + 22 | 2 | antipope | 2728 + 32 | 2 | amazon | 11342 + 42 | 2 | ausable | 15885 +(5 rows) + +-- where false with immutable function returning false +-- goes through fast-path +SELECT * + FROM articles_hash a + WHERE a.author_id = 10 and int4eq(1, 2); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- partition_column is null clause does not prune out any shards, +-- all shards remain after shard pruning, not router plannable +-- not fast-path router either +SELECT * + FROM articles_hash a + WHERE a.author_id is null; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- partition_column equals to null clause prunes out all shards +-- no shards after shard pruning, router plannable +-- not fast-path router either +SELECT * + FROM articles_hash a + WHERE a.author_id = null; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- union/difference /intersection with where false +-- this query was not originally router plannable, addition of 1=0 +-- makes it router plannable but not fast-path +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 2 and 1=0 +) AS combination +ORDER BY id; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- same with the above, but with WHERE false +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 2 and 1=0 +) AS combination WHERE false +ORDER BY id; + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +-- window functions with where false +SELECT word_count, rank() OVER (PARTITION BY author_id ORDER BY word_count) + FROM articles_hash + WHERE author_id = 1 and 1=0; + word_count | rank +--------------------------------------------------------------------- +(0 rows) + +-- complex query hitting a single shard and a fast-path +SELECT + count(DISTINCT CASE + WHEN + word_count > 100 + THEN + id + ELSE + NULL + END) as c + FROM + articles_hash + WHERE + author_id = 5; + c +--------------------------------------------------------------------- + 5 +(1 row) + +-- queries inside transactions can be fast-path router plannable +BEGIN; +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +END; +-- queries inside read-only transactions can be fast-path router plannable +SET TRANSACTION READ ONLY; +WARNING: SET TRANSACTION can only be used in transaction blocks +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +END; +WARNING: there is no transaction in progress +-- cursor queries are fast-path router plannable +BEGIN; +DECLARE test_cursor CURSOR FOR + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; +FETCH test_cursor; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 +(1 row) + +FETCH ALL test_cursor; + id | author_id | title | word_count +--------------------------------------------------------------------- + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(4 rows) + +FETCH test_cursor; -- fetch one row after the last + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +FETCH BACKWARD test_cursor; + id | author_id | title | word_count +--------------------------------------------------------------------- + 41 | 1 | aznavour | 11814 +(1 row) + +END; +-- queries inside copy can be router plannable +COPY ( + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id) TO STDOUT; +1 1 arsenous 9572 +11 1 alamo 1347 +21 1 arcading 5890 +31 1 athwartships 7271 +41 1 aznavour 11814 +-- table creation queries inside can be fast-path router plannable +CREATE TEMP TABLE temp_articles_hash as + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; +-- fast-path router plannable queries may include filter for aggregates +SELECT count(*), count(*) FILTER (WHERE id < 3) + FROM articles_hash + WHERE author_id = 1; + count | count +--------------------------------------------------------------------- + 5 | 1 +(1 row) + +-- prepare queries can be router plannable +PREPARE author_1_articles as + SELECT * + FROM articles_hash + WHERE author_id = 1; +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_1_articles; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- parametric prepare queries can be router plannable +PREPARE author_articles(int) as + SELECT * + FROM articles_hash + WHERE author_id = $1; +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(1); + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE author_articles(NULL); + id | author_id | title | word_count +--------------------------------------------------------------------- +(0 rows) + +PREPARE author_articles_update(int) AS + UPDATE articles_hash SET title = 'test' WHERE author_id = $1; +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +-- we don't want too many details. though we're omitting +-- "DETAIL: distribution column value:", we see it acceptable +-- since the query results verifies the correctness +\set VERBOSITY terse +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT author_articles_max_id(1); + author_articles_max_id +--------------------------------------------------------------------- + 41 +(1 row) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +SELECT * FROM author_articles_id_word_count(1); + id | word_count +--------------------------------------------------------------------- + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + +\set VERBOSITY default +-- insert .. select via coordinator could also +-- use fast-path queries +PREPARE insert_sel(int, int) AS +INSERT INTO articles_hash + SELECT * FROM articles_hash WHERE author_id = $2 AND word_count = $1 OFFSET 0; +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +-- one final interesting preperad statement +-- where one of the filters is on the target list +PREPARE fast_path_agg_filter(int, int) AS + SELECT + count(*) FILTER (WHERE word_count=$1) + FROM + articles_hash + WHERE author_id = $2; +EXECUTE fast_path_agg_filter(1,1); + count +--------------------------------------------------------------------- + 0 +(1 row) + +EXECUTE fast_path_agg_filter(2,2); + count +--------------------------------------------------------------------- + 0 +(1 row) + +EXECUTE fast_path_agg_filter(3,3); + count +--------------------------------------------------------------------- + 0 +(1 row) + +EXECUTE fast_path_agg_filter(4,4); + count +--------------------------------------------------------------------- + 0 +(1 row) + +EXECUTE fast_path_agg_filter(5,5); + count +--------------------------------------------------------------------- + 0 +(1 row) + +EXECUTE fast_path_agg_filter(6,6); + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- views internally become subqueries, so not fast-path router query +SELECT * FROM test_view; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +-- materialized views can be created for fast-path router plannable queries +CREATE MATERIALIZED VIEW mv_articles_hash_empty AS + SELECT * FROM articles_hash WHERE author_id = 1; +SELECT * FROM mv_articles_hash_empty; + id | author_id | title | word_count +--------------------------------------------------------------------- + 1 | 1 | arsenous | 9572 + 11 | 1 | alamo | 1347 + 21 | 1 | arcading | 5890 + 31 | 1 | athwartships | 7271 + 41 | 1 | aznavour | 11814 +(5 rows) + +SELECT id + FROM articles_hash + WHERE author_id = 1; + id +--------------------------------------------------------------------- + 1 + 11 + 21 + 31 + 41 +(5 rows) + +INSERT INTO articles_hash VALUES (51, 1, 'amateus', 1814), (52, 1, 'second amateus', 2824); +-- verify insert is successfull (not router plannable and executable) +SELECT id + FROM articles_hash + WHERE author_id = 1; + id +--------------------------------------------------------------------- + 1 + 11 + 21 + 31 + 41 + 51 + 52 +(7 rows) + +SELECT count(*) FROM collections_list WHERE key = 4; + count +--------------------------------------------------------------------- + 5 +(1 row) + +SELECT count(*) FROM collections_list_1 WHERE key = 4; + count +--------------------------------------------------------------------- + 5 +(1 row) + +SELECT count(*) FROM collections_list_2 WHERE key = 4; + count +--------------------------------------------------------------------- + 0 +(1 row) + +UPDATE collections_list SET value = 15 WHERE key = 4; +SELECT count(*) FILTER (where value = 15) FROM collections_list WHERE key = 4; + count +--------------------------------------------------------------------- + 5 +(1 row) + +SELECT count(*) FILTER (where value = 15) FROM collections_list_1 WHERE key = 4; + count +--------------------------------------------------------------------- + 5 +(1 row) + +SELECT count(*) FILTER (where value = 15) FROM collections_list_2 WHERE key = 4; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- test INSERT using values from generate_series() and repeat() functions +INSERT INTO authors_reference (id, name) VALUES (generate_series(1, 10), repeat('Migjeni', 3)); +SELECT * FROM authors_reference ORDER BY 1, 2; + id | name +--------------------------------------------------------------------- + 1 | MigjeniMigjeniMigjeni + 2 | MigjeniMigjeniMigjeni + 3 | MigjeniMigjeniMigjeni + 4 | MigjeniMigjeniMigjeni + 5 | MigjeniMigjeniMigjeni + 6 | MigjeniMigjeniMigjeni + 7 | MigjeniMigjeniMigjeni + 8 | MigjeniMigjeniMigjeni + 9 | MigjeniMigjeniMigjeni + 10 | MigjeniMigjeniMigjeni +(10 rows) + diff --git a/src/test/regress/expected/arbitrary_configs_router_create.out b/src/test/regress/expected/arbitrary_configs_router_create.out new file mode 100644 index 000000000..74dfbf4f3 --- /dev/null +++ b/src/test/regress/expected/arbitrary_configs_router_create.out @@ -0,0 +1,121 @@ +CREATE SCHEMA arbitrary_configs_router; +SET search_path TO arbitrary_configs_router; +CREATE TABLE articles_hash ( + id bigint NOT NULL, + author_id bigint NOT NULL, + title varchar(20) NOT NULL, + word_count integer +); +SELECT create_distributed_table('articles_hash', 'author_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE authors_reference (id int, name text); +SELECT create_reference_table('authors_reference'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +-- create a bunch of test data +INSERT INTO articles_hash VALUES (1, 1, 'arsenous', 9572), (2, 2, 'abducing', 13642),( 3, 3, 'asternal', 10480),( 4, 4, 'altdorfer', 14551),( 5, 5, 'aruru', 11389), + (6, 6, 'atlases', 15459),(7, 7, 'aseptic', 12298),( 8, 8, 'agatized', 16368),(9, 9, 'alligate', 438), + (10, 10, 'aggrandize', 17277),(11, 1, 'alamo', 1347),(12, 2, 'archiblast', 18185), + (13, 3, 'aseyev', 2255),(14, 4, 'andesite', 19094),(15, 5, 'adversa', 3164), + (16, 6, 'allonym', 2),(17, 7, 'auriga', 4073),(18, 8, 'assembly', 911),(19, 9, 'aubergiste', 4981), + (20, 10, 'absentness', 1820),(21, 1, 'arcading', 5890),(22, 2, 'antipope', 2728),(23, 3, 'abhorring', 6799), + (24, 4, 'audacious', 3637),(25, 5, 'antehall', 7707),(26, 6, 'abington', 4545),(27, 7, 'arsenous', 8616), + (28, 8, 'aerophyte', 5454),(29, 9, 'amateur', 9524),(30, 10, 'andelee', 6363),(31, 1, 'athwartships', 7271), + (32, 2, 'amazon', 11342),(33, 3, 'autochrome', 8180),(34, 4, 'amnestied', 12250),(35, 5, 'aminate', 9089), + (36, 6, 'ablation', 13159),(37, 7, 'archduchies', 9997),(38, 8, 'anatine', 14067),(39, 9, 'anchises', 10906), + (40, 10, 'attemper', 14976),(41, 1, 'aznavour', 11814),(42, 2, 'ausable', 15885),(43, 3, 'affixal', 12723), + (44, 4, 'anteport', 16793),(45, 5, 'afrasia', 864),(46, 6, 'atlanta', 17702),(47, 7, 'abeyance', 1772), + (48, 8, 'alkylic', 18610),(49, 9, 'anyone', 2681),(50, 10, 'anjanette', 19519); +CREATE TABLE company_employees (company_id int, employee_id int, manager_id int); +SELECT create_distributed_table('company_employees', 'company_id', 'hash'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO company_employees values(1, 1, 0); +INSERT INTO company_employees values(1, 2, 1); +INSERT INTO company_employees values(1, 3, 1); +INSERT INTO company_employees values(1, 4, 2); +INSERT INTO company_employees values(1, 5, 4); +INSERT INTO company_employees values(3, 1, 0); +INSERT INTO company_employees values(3, 15, 1); +INSERT INTO company_employees values(3, 3, 1); +-- finally, some tests with partitioned tables +CREATE TABLE collections_list ( + key bigint, + ts timestamptz, + collection_id integer, + value numeric +) PARTITION BY LIST (collection_id ); +CREATE TABLE collections_list_1 + PARTITION OF collections_list (key, ts, collection_id, value) + FOR VALUES IN ( 1 ); +CREATE TABLE collections_list_2 + PARTITION OF collections_list (key, ts, collection_id, value) + FOR VALUES IN ( 2 ); +SELECT create_distributed_table('collections_list', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO collections_list SELECT i % 10, now(), (i % 2) + 1, i*i FROM generate_series(0, 50)i; +-- queries inside plpgsql functions could be router plannable +CREATE OR REPLACE FUNCTION author_articles_max_id() RETURNS int AS $$ +DECLARE + max_id integer; +BEGIN + SELECT MAX(id) FROM articles_hash ah + WHERE author_id = 1 + into max_id; + return max_id; +END; +$$ LANGUAGE plpgsql; +-- queries inside plpgsql functions could be router plannable +CREATE OR REPLACE FUNCTION author_articles_max_id(int) RETURNS int AS $$ +DECLARE + max_id integer; +BEGIN + SELECT MAX(id) FROM articles_hash ah + WHERE author_id = $1 + into max_id; + return max_id; +END; +$$ LANGUAGE plpgsql; +-- check that function returning setof query are router plannable +CREATE OR REPLACE FUNCTION author_articles_id_word_count() RETURNS TABLE(id bigint, word_count int) AS $$ +DECLARE +BEGIN + RETURN QUERY + SELECT ah.id, ah.word_count + FROM articles_hash ah + WHERE author_id = 1; + +END; +$$ LANGUAGE plpgsql; +-- check that function returning setof query are router plannable +CREATE OR REPLACE FUNCTION author_articles_id_word_count(int) RETURNS TABLE(id bigint, word_count int) AS $$ +DECLARE +BEGIN + RETURN QUERY + SELECT ah.id, ah.word_count + FROM articles_hash ah + WHERE author_id = $1; + +END; +$$ LANGUAGE plpgsql; +-- Suppress the warning that tells that the view won't be distributed +-- because it depends on a local table. +-- +-- This only happens when running PostgresConfig. +SET client_min_messages TO ERROR; +CREATE VIEW test_view AS + SELECT * FROM articles_hash WHERE author_id = 1; diff --git a/src/test/regress/expected/background_rebalance.out b/src/test/regress/expected/background_rebalance.out index c82078d6f..e4495ccf9 100644 --- a/src/test/regress/expected/background_rebalance.out +++ b/src/test/regress/expected/background_rebalance.out @@ -291,6 +291,12 @@ SELECT state, details from citus_rebalance_status(); finished | {"tasks": [], "task_state_counts": {"done": 2}} (1 row) +SELECT public.wait_for_resource_cleanup(); + wait_for_resource_cleanup +--------------------------------------------------------------------- + +(1 row) + -- Remove coordinator again to allow rerunning of this test SELECT 1 FROM citus_remove_node('localhost', :master_port); ?column? diff --git a/src/test/regress/expected/background_rebalance_parallel.out b/src/test/regress/expected/background_rebalance_parallel.out new file mode 100644 index 000000000..862beb57e --- /dev/null +++ b/src/test/regress/expected/background_rebalance_parallel.out @@ -0,0 +1,364 @@ +/* + Test to check if the background tasks scheduled by the background rebalancer + has the correct dependencies. +*/ +CREATE SCHEMA background_rebalance_parallel; +SET search_path TO background_rebalance_parallel; +SET citus.next_shard_id TO 85674000; +SET citus.shard_replication_factor TO 1; +SET client_min_messages TO WARNING; +ALTER SEQUENCE pg_dist_background_job_job_id_seq RESTART 17777; +ALTER SEQUENCE pg_dist_background_task_task_id_seq RESTART 1000; +ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 50050; +SELECT nextval('pg_catalog.pg_dist_groupid_seq') AS last_group_id_cls \gset +SELECT nextval('pg_catalog.pg_dist_node_nodeid_seq') AS last_node_id_cls \gset +ALTER SEQUENCE pg_catalog.pg_dist_groupid_seq RESTART 50; +ALTER SEQUENCE pg_catalog.pg_dist_node_nodeid_seq RESTART 50; +SELECT 1 FROM master_remove_node('localhost', :worker_1_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT 1 FROM master_remove_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT 1 FROM master_add_node('localhost', :worker_1_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT 1 FROM master_add_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +ALTER SYSTEM SET citus.background_task_queue_interval TO '1s'; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +/* Colocation group 1: create two tables table1_colg1, table2_colg1 and in a colocation group */ +CREATE TABLE table1_colg1 (a int PRIMARY KEY); +SELECT create_distributed_table('table1_colg1', 'a', shard_count => 4 , colocate_with => 'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE table2_colg1 (b int PRIMARY KEY); +SELECT create_distributed_table('table2_colg1', 'b' , colocate_with => 'table1_colg1'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +/* Colocation group 2: create two tables table1_colg2, table2_colg2 and in a colocation group */ +CREATE TABLE table1_colg2 (a int PRIMARY KEY); +SELECT create_distributed_table('table1_colg2 ', 'a', shard_count => 4, colocate_with => 'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE table2_colg2 (b int primary key); +SELECT create_distributed_table('table2_colg2', 'b' , colocate_with => 'table1_colg2'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +/* Colocation group 3: create two tables table1_colg3, table2_colg3 and in a colocation group */ +CREATE TABLE table1_colg3 (a int PRIMARY KEY); +SELECT create_distributed_table('table1_colg3 ', 'a', shard_count => 4, colocate_with => 'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE table2_colg3 (b int primary key); +SELECT create_distributed_table('table2_colg3', 'b' , colocate_with => 'table1_colg3'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +/* Add two new node so that we can rebalance */ +SELECT 1 FROM citus_add_node('localhost', :worker_3_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT 1 FROM citus_add_node('localhost', :worker_4_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT * FROM get_rebalance_table_shards_plan() ORDER BY shardid; + table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport +--------------------------------------------------------------------- + table1_colg1 | 85674000 | 0 | localhost | 57637 | localhost | 57640 + table1_colg1 | 85674001 | 0 | localhost | 57638 | localhost | 57639 + table2_colg1 | 85674004 | 0 | localhost | 57637 | localhost | 57640 + table2_colg1 | 85674005 | 0 | localhost | 57638 | localhost | 57639 + table1_colg2 | 85674008 | 0 | localhost | 57637 | localhost | 57640 + table1_colg2 | 85674009 | 0 | localhost | 57638 | localhost | 57639 + table2_colg2 | 85674012 | 0 | localhost | 57637 | localhost | 57640 + table2_colg2 | 85674013 | 0 | localhost | 57638 | localhost | 57639 + table1_colg3 | 85674016 | 0 | localhost | 57637 | localhost | 57640 + table1_colg3 | 85674017 | 0 | localhost | 57638 | localhost | 57639 + table2_colg3 | 85674020 | 0 | localhost | 57637 | localhost | 57640 + table2_colg3 | 85674021 | 0 | localhost | 57638 | localhost | 57639 +(12 rows) + +SELECT * FROM citus_rebalance_start(); + citus_rebalance_start +--------------------------------------------------------------------- + 17777 +(1 row) + +SELECT citus_rebalance_wait(); + citus_rebalance_wait +--------------------------------------------------------------------- + +(1 row) + +/*Check that a move is dependent on + 1. any other move scheduled earlier in its colocation group. + 2. any other move scheduled earlier whose source node or target + node overlaps with the current moves nodes. */ +SELECT S.shardid, P.colocationid +FROM pg_dist_shard S, pg_dist_partition P +WHERE S.logicalrelid = P.logicalrelid ORDER BY S.shardid ASC; + shardid | colocationid +--------------------------------------------------------------------- + 85674000 | 50050 + 85674001 | 50050 + 85674002 | 50050 + 85674003 | 50050 + 85674004 | 50050 + 85674005 | 50050 + 85674006 | 50050 + 85674007 | 50050 + 85674008 | 50051 + 85674009 | 50051 + 85674010 | 50051 + 85674011 | 50051 + 85674012 | 50051 + 85674013 | 50051 + 85674014 | 50051 + 85674015 | 50051 + 85674016 | 50052 + 85674017 | 50052 + 85674018 | 50052 + 85674019 | 50052 + 85674020 | 50052 + 85674021 | 50052 + 85674022 | 50052 + 85674023 | 50052 +(24 rows) + +SELECT D.task_id, + (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.task_id), + D.depends_on, + (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.depends_on) +FROM pg_dist_background_task_depend D WHERE job_id = 17777 ORDER BY D.task_id, D.depends_on ASC; + task_id | command | depends_on | command +--------------------------------------------------------------------- + 1001 | SELECT pg_catalog.citus_move_shard_placement(85674000,50,53,'auto') | 1000 | SELECT pg_catalog.citus_move_shard_placement(85674001,51,52,'auto') + 1002 | SELECT pg_catalog.citus_move_shard_placement(85674009,51,52,'auto') | 1000 | SELECT pg_catalog.citus_move_shard_placement(85674001,51,52,'auto') + 1003 | SELECT pg_catalog.citus_move_shard_placement(85674008,50,53,'auto') | 1001 | SELECT pg_catalog.citus_move_shard_placement(85674000,50,53,'auto') + 1003 | SELECT pg_catalog.citus_move_shard_placement(85674008,50,53,'auto') | 1002 | SELECT pg_catalog.citus_move_shard_placement(85674009,51,52,'auto') + 1004 | SELECT pg_catalog.citus_move_shard_placement(85674017,51,52,'auto') | 1002 | SELECT pg_catalog.citus_move_shard_placement(85674009,51,52,'auto') + 1005 | SELECT pg_catalog.citus_move_shard_placement(85674016,50,53,'auto') | 1003 | SELECT pg_catalog.citus_move_shard_placement(85674008,50,53,'auto') + 1005 | SELECT pg_catalog.citus_move_shard_placement(85674016,50,53,'auto') | 1004 | SELECT pg_catalog.citus_move_shard_placement(85674017,51,52,'auto') +(7 rows) + +/* Check that if there is a reference table that needs to be synched to a node, + any move without a dependency must depend on the move task for reference table. */ +SELECT 1 FROM citus_drain_node('localhost',:worker_4_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT public.wait_for_resource_cleanup(); + wait_for_resource_cleanup +--------------------------------------------------------------------- + +(1 row) + +SELECT 1 FROM citus_disable_node('localhost', :worker_4_port, synchronous:=true); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +/* Drain worker_3 so that we can move only one colocation group to worker_3 + to create an unbalance that would cause parallel rebalancing. */ +SELECT 1 FROM citus_drain_node('localhost',:worker_3_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT citus_set_node_property('localhost', :worker_3_port, 'shouldhaveshards', true); + citus_set_node_property +--------------------------------------------------------------------- + +(1 row) + +CALL citus_cleanup_orphaned_resources(); +CREATE TABLE ref_table(a int PRIMARY KEY); +SELECT create_reference_table('ref_table'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +/* Move all the shards of Colocation group 3 to worker_3.*/ +SELECT +master_move_shard_placement(shardid, 'localhost', nodeport, 'localhost', :worker_3_port, 'block_writes') +FROM + pg_dist_shard NATURAL JOIN pg_dist_shard_placement +WHERE + logicalrelid = 'table1_colg3'::regclass AND nodeport <> :worker_3_port +ORDER BY + shardid; + master_move_shard_placement +--------------------------------------------------------------------- + + + + +(4 rows) + +CALL citus_cleanup_orphaned_resources(); +/* Activate and new nodes so that we can rebalance. */ +SELECT 1 FROM citus_activate_node('localhost', :worker_4_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT citus_set_node_property('localhost', :worker_4_port, 'shouldhaveshards', true); + citus_set_node_property +--------------------------------------------------------------------- + +(1 row) + +SELECT 1 FROM citus_add_node('localhost', :worker_5_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT 1 FROM citus_add_node('localhost', :worker_6_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT * FROM citus_rebalance_start(); + citus_rebalance_start +--------------------------------------------------------------------- + 17778 +(1 row) + +SELECT citus_rebalance_wait(); + citus_rebalance_wait +--------------------------------------------------------------------- + +(1 row) + +SELECT S.shardid, P.colocationid +FROM pg_dist_shard S, pg_dist_partition P +WHERE S.logicalrelid = P.logicalrelid ORDER BY S.shardid ASC; + shardid | colocationid +--------------------------------------------------------------------- + 85674000 | 50050 + 85674001 | 50050 + 85674002 | 50050 + 85674003 | 50050 + 85674004 | 50050 + 85674005 | 50050 + 85674006 | 50050 + 85674007 | 50050 + 85674008 | 50051 + 85674009 | 50051 + 85674010 | 50051 + 85674011 | 50051 + 85674012 | 50051 + 85674013 | 50051 + 85674014 | 50051 + 85674015 | 50051 + 85674016 | 50052 + 85674017 | 50052 + 85674018 | 50052 + 85674019 | 50052 + 85674020 | 50052 + 85674021 | 50052 + 85674022 | 50052 + 85674023 | 50052 + 85674024 | 50053 +(25 rows) + +SELECT D.task_id, + (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.task_id), + D.depends_on, + (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.depends_on) +FROM pg_dist_background_task_depend D WHERE job_id = 17778 ORDER BY D.task_id, D.depends_on ASC; + task_id | command | depends_on | command +--------------------------------------------------------------------- + 1007 | SELECT pg_catalog.citus_move_shard_placement(85674016,52,53,'auto') | 1006 | SELECT pg_catalog.replicate_reference_tables('auto') + 1008 | SELECT pg_catalog.citus_move_shard_placement(85674003,51,54,'auto') | 1006 | SELECT pg_catalog.replicate_reference_tables('auto') + 1009 | SELECT pg_catalog.citus_move_shard_placement(85674000,50,55,'auto') | 1008 | SELECT pg_catalog.citus_move_shard_placement(85674003,51,54,'auto') + 1010 | SELECT pg_catalog.citus_move_shard_placement(85674017,52,53,'auto') | 1007 | SELECT pg_catalog.citus_move_shard_placement(85674016,52,53,'auto') + 1011 | SELECT pg_catalog.citus_move_shard_placement(85674008,51,54,'auto') | 1008 | SELECT pg_catalog.citus_move_shard_placement(85674003,51,54,'auto') + 1012 | SELECT pg_catalog.citus_move_shard_placement(85674001,50,55,'auto') | 1009 | SELECT pg_catalog.citus_move_shard_placement(85674000,50,55,'auto') +(6 rows) + +DROP SCHEMA background_rebalance_parallel CASCADE; +TRUNCATE pg_dist_background_job CASCADE; +SELECT public.wait_for_resource_cleanup(); + wait_for_resource_cleanup +--------------------------------------------------------------------- + +(1 row) + +select citus_remove_node('localhost', :worker_3_port); + citus_remove_node +--------------------------------------------------------------------- + +(1 row) + +select citus_remove_node('localhost', :worker_4_port); + citus_remove_node +--------------------------------------------------------------------- + +(1 row) + +select citus_remove_node('localhost', :worker_5_port); + citus_remove_node +--------------------------------------------------------------------- + +(1 row) + +select citus_remove_node('localhost', :worker_6_port); + citus_remove_node +--------------------------------------------------------------------- + +(1 row) + +-- keep the rest of the tests inact that depends node/group ids +ALTER SEQUENCE pg_catalog.pg_dist_groupid_seq RESTART :last_group_id_cls; +ALTER SEQUENCE pg_catalog.pg_dist_node_nodeid_seq RESTART :last_node_id_cls; diff --git a/src/test/regress/expected/citus_non_blocking_split_shards.out b/src/test/regress/expected/citus_non_blocking_split_shards.out index d6dde8b7a..fe3cade55 100644 --- a/src/test/regress/expected/citus_non_blocking_split_shards.out +++ b/src/test/regress/expected/citus_non_blocking_split_shards.out @@ -60,7 +60,7 @@ SELECT create_reference_table('reference_table'); (1 row) -CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY); +CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY, genid integer GENERATED ALWAYS AS ( measureid + 3 ) stored, value varchar(44), col_todrop integer); CLUSTER colocated_dist_table USING colocated_dist_table_pkey; SELECT create_distributed_table('colocated_dist_table', 'measureid', colocate_with:='sensors'); create_distributed_table @@ -84,8 +84,9 @@ ALTER TABLE sensors ADD CONSTRAINT fkey_table_to_dist FOREIGN KEY (measureid) RE -- END : Create Foreign key constraints. -- BEGIN : Load data into tables. INSERT INTO reference_table SELECT i FROM generate_series(0,1000)i; -INSERT INTO colocated_dist_table SELECT i FROM generate_series(0,1000)i; +INSERT INTO colocated_dist_table(measureid, value, col_todrop) SELECT i,'Value',i FROM generate_series(0,1000)i; INSERT INTO sensors SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' FROM generate_series(0,1000)i; +ALTER TABLE colocated_dist_table DROP COLUMN col_todrop; SELECT COUNT(*) FROM sensors; count --------------------------------------------------------------------- diff --git a/src/test/regress/expected/citus_split_shard_by_split_points.out b/src/test/regress/expected/citus_split_shard_by_split_points.out index 87f50da31..13f3b7a36 100644 --- a/src/test/regress/expected/citus_split_shard_by_split_points.out +++ b/src/test/regress/expected/citus_split_shard_by_split_points.out @@ -56,7 +56,7 @@ SELECT create_reference_table('reference_table'); (1 row) -CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY); +CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY, genid integer GENERATED ALWAYS AS ( measureid + 3 ) stored, value varchar(44), col_todrop integer); CLUSTER colocated_dist_table USING colocated_dist_table_pkey; SELECT create_distributed_table('colocated_dist_table', 'measureid', colocate_with:='sensors'); create_distributed_table @@ -80,8 +80,9 @@ ALTER TABLE sensors ADD CONSTRAINT fkey_table_to_dist FOREIGN KEY (measureid) RE -- END : Create Foreign key constraints. -- BEGIN : Load data into tables. INSERT INTO reference_table SELECT i FROM generate_series(0,1000)i; -INSERT INTO colocated_dist_table SELECT i FROM generate_series(0,1000)i; +INSERT INTO colocated_dist_table(measureid, value, col_todrop) SELECT i,'Value',i FROM generate_series(0,1000)i; INSERT INTO sensors SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' FROM generate_series(0,1000)i; +ALTER TABLE colocated_dist_table DROP COLUMN col_todrop; SELECT COUNT(*) FROM sensors; count --------------------------------------------------------------------- diff --git a/src/test/regress/expected/citus_split_shard_by_split_points_deferred_drop.out b/src/test/regress/expected/citus_split_shard_by_split_points_deferred_drop.out index fc66118ca..5c1341ba1 100644 --- a/src/test/regress/expected/citus_split_shard_by_split_points_deferred_drop.out +++ b/src/test/regress/expected/citus_split_shard_by_split_points_deferred_drop.out @@ -28,7 +28,7 @@ SET citus.next_placement_id TO 8610000; SET citus.shard_count TO 2; SET citus.shard_replication_factor TO 1; SET citus.next_operation_id TO 777; -SET citus.next_cleanup_record_id TO 511; +ALTER SEQUENCE pg_catalog.pg_dist_cleanup_recordid_seq RESTART 511; SET ROLE test_split_role; SET search_path TO "citus_split_shard_by_split_points_deferred_schema"; CREATE TABLE table_to_split(id int PRIMARY KEY, int_data int, data text); @@ -64,10 +64,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points( -- The original shard is marked for deferred drop with policy_type = 2. -- The previous shard should be dropped at the beginning of the second split call -SELECT * from pg_dist_cleanup; +SELECT * FROM pg_dist_cleanup WHERE policy_type = 2; record_id | operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- - 512 | 778 | 1 | citus_split_shard_by_split_points_deferred_schema.table_to_split_8981001 | 16 | 2 + 526 | 778 | 1 | citus_split_shard_by_split_points_deferred_schema.table_to_split_8981001 | 16 | 2 (1 row) -- One of the physical shards should not be deleted, the other one should. @@ -90,8 +90,12 @@ SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind -- Perform deferred drop cleanup. \c - postgres - :master_port -CALL citus_cleanup_orphaned_resources(); -NOTICE: cleaned up 1 orphaned resources +SELECT public.wait_for_resource_cleanup(); + wait_for_resource_cleanup +--------------------------------------------------------------------- + +(1 row) + -- Clenaup has been done. SELECT * from pg_dist_cleanup; record_id | operation_id | object_type | object_name | node_group_id | policy_type diff --git a/src/test/regress/expected/citus_update_table_statistics.out b/src/test/regress/expected/citus_update_table_statistics.out index 69676c1bf..031104c53 100644 --- a/src/test/regress/expected/citus_update_table_statistics.out +++ b/src/test/regress/expected/citus_update_table_statistics.out @@ -64,11 +64,11 @@ SET citus.multi_shard_modify_mode TO sequential; SELECT citus_update_table_statistics('test_table_statistics_hash'); NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx -NOTICE: issuing SELECT 981000 AS shard_id, 'public.test_table_statistics_hash_981000' AS shard_name, pg_relation_size('public.test_table_statistics_hash_981000') UNION ALL SELECT 981001 AS shard_id, 'public.test_table_statistics_hash_981001' AS shard_name, pg_relation_size('public.test_table_statistics_hash_981001') UNION ALL SELECT 981002 AS shard_id, 'public.test_table_statistics_hash_981002' AS shard_name, pg_relation_size('public.test_table_statistics_hash_981002') UNION ALL SELECT 981003 AS shard_id, 'public.test_table_statistics_hash_981003' AS shard_name, pg_relation_size('public.test_table_statistics_hash_981003') UNION ALL SELECT 981004 AS shard_id, 'public.test_table_statistics_hash_981004' AS shard_name, pg_relation_size('public.test_table_statistics_hash_981004') UNION ALL SELECT 981005 AS shard_id, 'public.test_table_statistics_hash_981005' AS shard_name, pg_relation_size('public.test_table_statistics_hash_981005') UNION ALL SELECT 981006 AS shard_id, 'public.test_table_statistics_hash_981006' AS shard_name, pg_relation_size('public.test_table_statistics_hash_981006') UNION ALL SELECT 981007 AS shard_id, 'public.test_table_statistics_hash_981007' AS shard_name, pg_relation_size('public.test_table_statistics_hash_981007') UNION ALL SELECT 0::bigint, NULL::text, 0::bigint; +NOTICE: issuing SELECT 981000 AS shard_id, 'public.test_table_statistics_hash_981000' AS shard_name, pg_total_relation_size('public.test_table_statistics_hash_981000') UNION ALL SELECT 981001 AS shard_id, 'public.test_table_statistics_hash_981001' AS shard_name, pg_total_relation_size('public.test_table_statistics_hash_981001') UNION ALL SELECT 981002 AS shard_id, 'public.test_table_statistics_hash_981002' AS shard_name, pg_total_relation_size('public.test_table_statistics_hash_981002') UNION ALL SELECT 981003 AS shard_id, 'public.test_table_statistics_hash_981003' AS shard_name, pg_total_relation_size('public.test_table_statistics_hash_981003') UNION ALL SELECT 981004 AS shard_id, 'public.test_table_statistics_hash_981004' AS shard_name, pg_total_relation_size('public.test_table_statistics_hash_981004') UNION ALL SELECT 981005 AS shard_id, 'public.test_table_statistics_hash_981005' AS shard_name, pg_total_relation_size('public.test_table_statistics_hash_981005') UNION ALL SELECT 981006 AS shard_id, 'public.test_table_statistics_hash_981006' AS shard_name, pg_total_relation_size('public.test_table_statistics_hash_981006') UNION ALL SELECT 981007 AS shard_id, 'public.test_table_statistics_hash_981007' AS shard_name, pg_total_relation_size('public.test_table_statistics_hash_981007') UNION ALL SELECT 0::bigint, NULL::text, 0::bigint; DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx -NOTICE: issuing SELECT 981000 AS shard_id, 'public.test_table_statistics_hash_981000' AS shard_name, pg_relation_size('public.test_table_statistics_hash_981000') UNION ALL SELECT 981001 AS shard_id, 'public.test_table_statistics_hash_981001' AS shard_name, pg_relation_size('public.test_table_statistics_hash_981001') UNION ALL SELECT 981002 AS shard_id, 'public.test_table_statistics_hash_981002' AS shard_name, pg_relation_size('public.test_table_statistics_hash_981002') UNION ALL SELECT 981003 AS shard_id, 'public.test_table_statistics_hash_981003' AS shard_name, pg_relation_size('public.test_table_statistics_hash_981003') UNION ALL SELECT 981004 AS shard_id, 'public.test_table_statistics_hash_981004' AS shard_name, pg_relation_size('public.test_table_statistics_hash_981004') UNION ALL SELECT 981005 AS shard_id, 'public.test_table_statistics_hash_981005' AS shard_name, pg_relation_size('public.test_table_statistics_hash_981005') UNION ALL SELECT 981006 AS shard_id, 'public.test_table_statistics_hash_981006' AS shard_name, pg_relation_size('public.test_table_statistics_hash_981006') UNION ALL SELECT 981007 AS shard_id, 'public.test_table_statistics_hash_981007' AS shard_name, pg_relation_size('public.test_table_statistics_hash_981007') UNION ALL SELECT 0::bigint, NULL::text, 0::bigint; +NOTICE: issuing SELECT 981000 AS shard_id, 'public.test_table_statistics_hash_981000' AS shard_name, pg_total_relation_size('public.test_table_statistics_hash_981000') UNION ALL SELECT 981001 AS shard_id, 'public.test_table_statistics_hash_981001' AS shard_name, pg_total_relation_size('public.test_table_statistics_hash_981001') UNION ALL SELECT 981002 AS shard_id, 'public.test_table_statistics_hash_981002' AS shard_name, pg_total_relation_size('public.test_table_statistics_hash_981002') UNION ALL SELECT 981003 AS shard_id, 'public.test_table_statistics_hash_981003' AS shard_name, pg_total_relation_size('public.test_table_statistics_hash_981003') UNION ALL SELECT 981004 AS shard_id, 'public.test_table_statistics_hash_981004' AS shard_name, pg_total_relation_size('public.test_table_statistics_hash_981004') UNION ALL SELECT 981005 AS shard_id, 'public.test_table_statistics_hash_981005' AS shard_name, pg_total_relation_size('public.test_table_statistics_hash_981005') UNION ALL SELECT 981006 AS shard_id, 'public.test_table_statistics_hash_981006' AS shard_name, pg_total_relation_size('public.test_table_statistics_hash_981006') UNION ALL SELECT 981007 AS shard_id, 'public.test_table_statistics_hash_981007' AS shard_name, pg_total_relation_size('public.test_table_statistics_hash_981007') UNION ALL SELECT 0::bigint, NULL::text, 0::bigint; DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx NOTICE: issuing COMMIT DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx @@ -152,11 +152,11 @@ SET citus.multi_shard_modify_mode TO sequential; SELECT citus_update_table_statistics('test_table_statistics_append'); NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx -NOTICE: issuing SELECT 981008 AS shard_id, 'public.test_table_statistics_append_981008' AS shard_name, pg_relation_size('public.test_table_statistics_append_981008') UNION ALL SELECT 981009 AS shard_id, 'public.test_table_statistics_append_981009' AS shard_name, pg_relation_size('public.test_table_statistics_append_981009') UNION ALL SELECT 0::bigint, NULL::text, 0::bigint; +NOTICE: issuing SELECT 981008 AS shard_id, 'public.test_table_statistics_append_981008' AS shard_name, pg_total_relation_size('public.test_table_statistics_append_981008') UNION ALL SELECT 981009 AS shard_id, 'public.test_table_statistics_append_981009' AS shard_name, pg_total_relation_size('public.test_table_statistics_append_981009') UNION ALL SELECT 0::bigint, NULL::text, 0::bigint; DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx -NOTICE: issuing SELECT 981008 AS shard_id, 'public.test_table_statistics_append_981008' AS shard_name, pg_relation_size('public.test_table_statistics_append_981008') UNION ALL SELECT 981009 AS shard_id, 'public.test_table_statistics_append_981009' AS shard_name, pg_relation_size('public.test_table_statistics_append_981009') UNION ALL SELECT 0::bigint, NULL::text, 0::bigint; +NOTICE: issuing SELECT 981008 AS shard_id, 'public.test_table_statistics_append_981008' AS shard_name, pg_total_relation_size('public.test_table_statistics_append_981008') UNION ALL SELECT 981009 AS shard_id, 'public.test_table_statistics_append_981009' AS shard_name, pg_total_relation_size('public.test_table_statistics_append_981009') UNION ALL SELECT 0::bigint, NULL::text, 0::bigint; DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx NOTICE: issuing COMMIT DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx diff --git a/src/test/regress/expected/failure_mx_metadata_sync_multi_trans.out b/src/test/regress/expected/failure_mx_metadata_sync_multi_trans.out new file mode 100644 index 000000000..3a39f3644 --- /dev/null +++ b/src/test/regress/expected/failure_mx_metadata_sync_multi_trans.out @@ -0,0 +1,687 @@ +-- +-- failure_mx_metadata_sync_multi_trans.sql +-- +CREATE SCHEMA IF NOT EXISTS mx_metadata_sync_multi_trans; +SET SEARCH_PATH = mx_metadata_sync_multi_trans; +SET citus.shard_count TO 2; +SET citus.next_shard_id TO 16000000; +SET citus.shard_replication_factor TO 1; +SET citus.metadata_sync_mode TO 'nontransactional'; +SELECT pg_backend_pid() as pid \gset +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\set VERBOSITY terse +SET client_min_messages TO ERROR; +-- Create roles +CREATE ROLE foo1; +CREATE ROLE foo2; +-- Create sequence +CREATE SEQUENCE seq; +-- Create colocated distributed tables +CREATE TABLE dist1 (id int PRIMARY KEY default nextval('seq')); +SELECT create_distributed_table('dist1', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO dist1 SELECT i FROM generate_series(1,100) i; +CREATE TABLE dist2 (id int PRIMARY KEY default nextval('seq')); +SELECT create_distributed_table('dist2', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO dist2 SELECT i FROM generate_series(1,100) i; +-- Create a reference table +CREATE TABLE ref (id int UNIQUE); +SELECT create_reference_table('ref'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO ref SELECT i FROM generate_series(1,100) i; +-- Create local tables +CREATE TABLE loc1 (id int PRIMARY KEY); +INSERT INTO loc1 SELECT i FROM generate_series(1,100) i; +CREATE TABLE loc2 (id int REFERENCES loc1(id)); +INSERT INTO loc2 SELECT i FROM generate_series(1,100) i; +SELECT citus_set_coordinator_host('localhost', :master_port); + citus_set_coordinator_host +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_add_local_table_to_metadata('loc1', cascade_via_foreign_keys => true); + citus_add_local_table_to_metadata +--------------------------------------------------------------------- + +(1 row) + +-- Create partitioned distributed table +CREATE TABLE orders ( + id bigint, + order_time timestamp without time zone NOT NULL, + region_id bigint NOT NULL +) +PARTITION BY RANGE (order_time); +SELECT create_time_partitions( + table_name := 'orders', + partition_interval := '1 day', + start_from := '2020-01-01', + end_at := '2020-01-11' +); + create_time_partitions +--------------------------------------------------------------------- + t +(1 row) + +SELECT create_distributed_table('orders', 'region_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Initially turn metadata sync to worker2 off because we'll ingest errors to start/stop metadata sync operations +SELECT stop_metadata_sync_to_node('localhost', :worker_2_proxy_port); + stop_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +SELECT isactive, metadatasynced, hasmetadata FROM pg_dist_node WHERE nodeport=:worker_2_proxy_port; + isactive | metadatasynced | hasmetadata +--------------------------------------------------------------------- + t | f | f +(1 row) + +-- Failure to send local group id +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_local_group SET groupid").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_local_group SET groupid").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to drop node metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_node").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_node").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to send node metadata +SELECT citus.mitmproxy('conn.onQuery(query="INSERT INTO pg_dist_node").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="INSERT INTO pg_dist_node").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to drop sequence +SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_catalog.worker_drop_sequence_dependency").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_catalog.worker_drop_sequence_dependency").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to drop shell table +SELECT citus.mitmproxy('conn.onQuery(query="CALL pg_catalog.worker_drop_all_shell_tables").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="CALL pg_catalog.worker_drop_all_shell_tables").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to delete all pg_dist_partition metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_partition").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_partition").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to delete all pg_dist_shard metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_shard").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_shard").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to delete all pg_dist_placement metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_placement").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_placement").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to delete all pg_dist_object metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_catalog.pg_dist_object").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_catalog.pg_dist_object").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to delete all pg_dist_colocation metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_catalog.pg_dist_colocation").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_catalog.pg_dist_colocation").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to alter or create role +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_create_or_alter_role").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_create_or_alter_role").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to set database owner +SELECT citus.mitmproxy('conn.onQuery(query="ALTER DATABASE.*OWNER TO").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="ALTER DATABASE.*OWNER TO").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Filure to create schema +SELECT citus.mitmproxy('conn.onQuery(query="CREATE SCHEMA IF NOT EXISTS mx_metadata_sync_multi_trans AUTHORIZATION").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="CREATE SCHEMA IF NOT EXISTS mx_metadata_sync_multi_trans AUTHORIZATION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to create sequence +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_apply_sequence_command").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_apply_sequence_command").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to create distributed table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.dist1").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.dist1").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to create reference table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.ref").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.ref").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to create local table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.loc1").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.loc1").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to create distributed partitioned table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.orders").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.orders").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to create distributed partition table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.orders_p2020_01_05").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.orders_p2020_01_05").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to attach partition +SELECT citus.mitmproxy('conn.onQuery(query="ALTER TABLE mx_metadata_sync_multi_trans.orders ATTACH PARTITION mx_metadata_sync_multi_trans.orders_p2020_01_05").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="ALTER TABLE mx_metadata_sync_multi_trans.orders ATTACH PARTITION mx_metadata_sync_multi_trans.orders_p2020_01_05").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to add partition metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_partition_metadata").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_partition_metadata").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to add shard metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_shard_metadata").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_shard_metadata").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to add placement metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_placement_metadata").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_placement_metadata").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to add colocation metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_catalog.citus_internal_add_colocation_metadata").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_catalog.citus_internal_add_colocation_metadata").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to add distributed object metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_object_metadata").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_object_metadata").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to set isactive to true +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET isactive = TRUE").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET isactive = TRUE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection not open +-- Failure to set metadatasynced to true +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET metadatasynced = TRUE").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET metadatasynced = TRUE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection not open +-- Failure to set hasmetadata to true +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET hasmetadata = TRUE").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET hasmetadata = TRUE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection not open +-- Show node metadata info on coordinator after failures +SELECT * FROM pg_dist_node ORDER BY nodeport; + nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards +--------------------------------------------------------------------- + 4 | 4 | localhost | 9060 | default | f | t | primary | default | f | t + 6 | 0 | localhost | 57636 | default | t | t | primary | default | t | f + 1 | 1 | localhost | 57637 | default | t | t | primary | default | t | t +(3 rows) + +-- Show that we can still query the node from coordinator +SELECT COUNT(*) FROM dist1; + count +--------------------------------------------------------------------- + 100 +(1 row) + +-- Verify that the value 103 belongs to a shard at the node to which we failed to sync metadata +SELECT 103 AS failed_node_val \gset +SELECT nodeid AS failed_nodeid FROM pg_dist_node WHERE metadatasynced = false \gset +SELECT get_shard_id_for_distribution_column('dist1', :failed_node_val) AS shardid \gset +SELECT groupid = :failed_nodeid FROM pg_dist_placement WHERE shardid = :shardid; + ?column? +--------------------------------------------------------------------- + t +(1 row) + +-- Show that we can still insert into a shard at the node from coordinator +INSERT INTO dist1 VALUES (:failed_node_val); +-- Show that we can still update a shard at the node from coordinator +UPDATE dist1 SET id = :failed_node_val WHERE id = :failed_node_val; +-- Show that we can still delete from a shard at the node from coordinator +DELETE FROM dist1 WHERE id = :failed_node_val; +-- Show that DDL would still propagate to the node +SET client_min_messages TO NOTICE; +SET citus.log_remote_commands TO 1; +CREATE SCHEMA dummy; +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +NOTICE: issuing SET citus.enable_ddl_propagation TO 'off' +NOTICE: issuing CREATE SCHEMA dummy +NOTICE: issuing SET citus.enable_ddl_propagation TO 'on' +NOTICE: issuing SET citus.enable_ddl_propagation TO 'off' +NOTICE: issuing CREATE SCHEMA dummy +NOTICE: issuing SET citus.enable_ddl_propagation TO 'on' +NOTICE: issuing WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['dummy']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; +NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx' +NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx' +NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx' +NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx' +SET citus.log_remote_commands TO 0; +SET client_min_messages TO ERROR; +-- Successfully activate the node after many failures +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + citus_activate_node +--------------------------------------------------------------------- + 4 +(1 row) + +-- Activate the node once more to verify it works again with already synced metadata +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + citus_activate_node +--------------------------------------------------------------------- + 4 +(1 row) + +-- Show node metadata info on worker2 and coordinator after success +\c - - - :worker_2_port +SELECT * FROM pg_dist_node ORDER BY nodeport; + nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards +--------------------------------------------------------------------- + 4 | 4 | localhost | 9060 | default | t | t | primary | default | t | t + 6 | 0 | localhost | 57636 | default | t | t | primary | default | t | f + 1 | 1 | localhost | 57637 | default | t | t | primary | default | t | t +(3 rows) + +\c - - - :master_port +SELECT * FROM pg_dist_node ORDER BY nodeport; + nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards +--------------------------------------------------------------------- + 4 | 4 | localhost | 9060 | default | t | t | primary | default | t | t + 6 | 0 | localhost | 57636 | default | t | t | primary | default | t | f + 1 | 1 | localhost | 57637 | default | t | t | primary | default | t | t +(3 rows) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +RESET citus.metadata_sync_mode; +DROP SCHEMA dummy; +DROP SCHEMA mx_metadata_sync_multi_trans CASCADE; +NOTICE: drop cascades to 10 other objects +DROP ROLE foo1; +DROP ROLE foo2; +SELECT citus_remove_node('localhost', :master_port); + citus_remove_node +--------------------------------------------------------------------- + +(1 row) + diff --git a/src/test/regress/expected/generated_identity.out b/src/test/regress/expected/generated_identity.out index 23a87af3d..865012af0 100644 --- a/src/test/regress/expected/generated_identity.out +++ b/src/test/regress/expected/generated_identity.out @@ -1,525 +1,431 @@ +-- This test file has an alternative output because of error messages vary for PG13 +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int <= 13 AS server_version_le_13; + server_version_le_13 +--------------------------------------------------------------------- + f +(1 row) + CREATE SCHEMA generated_identities; SET search_path TO generated_identities; SET client_min_messages to ERROR; +SET citus.shard_replication_factor TO 1; SELECT 1 from citus_add_node('localhost', :master_port, groupId=>0); ?column? --------------------------------------------------------------------- 1 (1 row) -DROP TABLE IF EXISTS generated_identities_test; --- create a partitioned table for testing. -CREATE TABLE generated_identities_test ( - a int CONSTRAINT myconname GENERATED BY DEFAULT AS IDENTITY, - b bigint GENERATED ALWAYS AS IDENTITY (START WITH 10 INCREMENT BY 10), - c smallint GENERATED BY DEFAULT AS IDENTITY, - d serial, - e bigserial, - f smallserial, - g int -) -PARTITION BY RANGE (a); -CREATE TABLE generated_identities_test_1_5 PARTITION OF generated_identities_test FOR VALUES FROM (1) TO (5); -CREATE TABLE generated_identities_test_5_50 PARTITION OF generated_identities_test FOR VALUES FROM (5) TO (50); --- local tables -SELECT citus_add_local_table_to_metadata('generated_identities_test'); +-- smallint identity column can not be distributed +CREATE TABLE smallint_identity_column ( + a smallint GENERATED BY DEFAULT AS IDENTITY +); +SELECT create_distributed_table('smallint_identity_column', 'a'); +ERROR: cannot complete operation on generated_identities.smallint_identity_column with smallint/int identity column +HINT: Use bigint identity column instead. +SELECT create_distributed_table_concurrently('smallint_identity_column', 'a'); +ERROR: cannot complete operation on generated_identities.smallint_identity_column with smallint/int identity column +HINT: Use bigint identity column instead. +SELECT create_reference_table('smallint_identity_column'); +ERROR: cannot complete operation on a table with identity column +SELECT citus_add_local_table_to_metadata('smallint_identity_column'); citus_add_local_table_to_metadata --------------------------------------------------------------------- (1 row) -\d generated_identities_test - Partitioned table "generated_identities.generated_identities_test" - Column | Type | Collation | Nullable | Default ---------------------------------------------------------------------- - a | integer | | not null | generated by default as identity - b | bigint | | not null | generated always as identity - c | smallint | | not null | generated by default as identity - d | integer | | not null | nextval('generated_identities_test_d_seq'::regclass) - e | bigint | | not null | nextval('generated_identities_test_e_seq'::regclass) - f | smallint | | not null | nextval('generated_identities_test_f_seq'::regclass) - g | integer | | | -Partition key: RANGE (a) -Number of partitions: 2 (Use \d+ to list them.) - -\c - - - :worker_1_port -\d generated_identities.generated_identities_test - Partitioned table "generated_identities.generated_identities_test" - Column | Type | Collation | Nullable | Default ---------------------------------------------------------------------- - a | integer | | not null | worker_nextval('generated_identities.generated_identities_test_a_seq'::regclass) - b | bigint | | not null | nextval('generated_identities.generated_identities_test_b_seq'::regclass) - c | smallint | | not null | worker_nextval('generated_identities.generated_identities_test_c_seq'::regclass) - d | integer | | not null | worker_nextval('generated_identities.generated_identities_test_d_seq'::regclass) - e | bigint | | not null | nextval('generated_identities.generated_identities_test_e_seq'::regclass) - f | smallint | | not null | worker_nextval('generated_identities.generated_identities_test_f_seq'::regclass) - g | integer | | | -Partition key: RANGE (a) -Number of partitions: 2 (Use \d+ to list them.) - -\c - - - :master_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; -SELECT undistribute_table('generated_identities_test'); - undistribute_table +DROP TABLE smallint_identity_column; +-- int identity column can not be distributed +CREATE TABLE int_identity_column ( + a int GENERATED BY DEFAULT AS IDENTITY +); +SELECT create_distributed_table('int_identity_column', 'a'); +ERROR: cannot complete operation on generated_identities.int_identity_column with smallint/int identity column +HINT: Use bigint identity column instead. +SELECT create_distributed_table_concurrently('int_identity_column', 'a'); +ERROR: cannot complete operation on generated_identities.int_identity_column with smallint/int identity column +HINT: Use bigint identity column instead. +SELECT create_reference_table('int_identity_column'); +ERROR: cannot complete operation on a table with identity column +SELECT citus_add_local_table_to_metadata('int_identity_column'); + citus_add_local_table_to_metadata --------------------------------------------------------------------- (1 row) -SELECT citus_remove_node('localhost', :master_port); - citus_remove_node +DROP TABLE int_identity_column; +RESET citus.shard_replication_factor; +CREATE TABLE bigint_identity_column ( + a bigint GENERATED BY DEFAULT AS IDENTITY, + b int +); +SELECT citus_add_local_table_to_metadata('bigint_identity_column'); + citus_add_local_table_to_metadata --------------------------------------------------------------------- (1 row) -SELECT create_distributed_table('generated_identities_test', 'a'); +DROP TABLE bigint_identity_column; +CREATE TABLE bigint_identity_column ( + a bigint GENERATED BY DEFAULT AS IDENTITY, + b int +); +SELECT create_distributed_table('bigint_identity_column', 'a'); create_distributed_table --------------------------------------------------------------------- (1 row) -\d generated_identities_test - Partitioned table "generated_identities.generated_identities_test" - Column | Type | Collation | Nullable | Default +\d bigint_identity_column + Table "generated_identities.bigint_identity_column" + Column | Type | Collation | Nullable | Default --------------------------------------------------------------------- - a | integer | | not null | generated by default as identity - b | bigint | | not null | generated always as identity - c | smallint | | not null | generated by default as identity - d | integer | | not null | nextval('generated_identities_test_d_seq'::regclass) - e | bigint | | not null | nextval('generated_identities_test_e_seq'::regclass) - f | smallint | | not null | nextval('generated_identities_test_f_seq'::regclass) - g | integer | | | -Partition key: RANGE (a) -Number of partitions: 2 (Use \d+ to list them.) + a | bigint | | not null | generated by default as identity + b | integer | | | \c - - - :worker_1_port -\d generated_identities.generated_identities_test - Partitioned table "generated_identities.generated_identities_test" - Column | Type | Collation | Nullable | Default +SET search_path TO generated_identities; +SET client_min_messages to ERROR; +INSERT INTO bigint_identity_column (b) +SELECT s FROM generate_series(1,10) s; +\d generated_identities.bigint_identity_column + Table "generated_identities.bigint_identity_column" + Column | Type | Collation | Nullable | Default --------------------------------------------------------------------- - a | integer | | not null | worker_nextval('generated_identities.generated_identities_test_a_seq'::regclass) - b | bigint | | not null | nextval('generated_identities.generated_identities_test_b_seq'::regclass) - c | smallint | | not null | worker_nextval('generated_identities.generated_identities_test_c_seq'::regclass) - d | integer | | not null | worker_nextval('generated_identities.generated_identities_test_d_seq'::regclass) - e | bigint | | not null | nextval('generated_identities.generated_identities_test_e_seq'::regclass) - f | smallint | | not null | worker_nextval('generated_identities.generated_identities_test_f_seq'::regclass) - g | integer | | | -Partition key: RANGE (a) -Number of partitions: 2 (Use \d+ to list them.) + a | bigint | | not null | generated by default as identity + b | integer | | | \c - - - :master_port SET search_path TO generated_identities; SET client_min_messages to ERROR; -insert into generated_identities_test (g) values (1); -insert into generated_identities_test (g) SELECT 2; -INSERT INTO generated_identities_test (g) +INSERT INTO bigint_identity_column (b) +SELECT s FROM generate_series(11,20) s; +SELECT * FROM bigint_identity_column ORDER BY B ASC; + a | b +--------------------------------------------------------------------- + 3940649673949185 | 1 + 3940649673949186 | 2 + 3940649673949187 | 3 + 3940649673949188 | 4 + 3940649673949189 | 5 + 3940649673949190 | 6 + 3940649673949191 | 7 + 3940649673949192 | 8 + 3940649673949193 | 9 + 3940649673949194 | 10 + 1 | 11 + 2 | 12 + 3 | 13 + 4 | 14 + 5 | 15 + 6 | 16 + 7 | 17 + 8 | 18 + 9 | 19 + 10 | 20 +(20 rows) + +-- table with identity column cannot be altered. +SELECT alter_distributed_table('bigint_identity_column', 'b'); +ERROR: cannot complete operation on a table with identity column +-- table with identity column cannot be undistributed. +SELECT undistribute_table('bigint_identity_column'); +ERROR: cannot complete operation on a table with identity column +DROP TABLE bigint_identity_column; +-- create a partitioned table for testing. +CREATE TABLE partitioned_table ( + a bigint CONSTRAINT myconname GENERATED BY DEFAULT AS IDENTITY (START WITH 10 INCREMENT BY 10), + b bigint GENERATED ALWAYS AS IDENTITY (START WITH 10 INCREMENT BY 10), + c int +) +PARTITION BY RANGE (c); +CREATE TABLE partitioned_table_1_50 PARTITION OF partitioned_table FOR VALUES FROM (1) TO (50); +CREATE TABLE partitioned_table_50_500 PARTITION OF partitioned_table FOR VALUES FROM (50) TO (1000); +SELECT create_distributed_table('partitioned_table', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +\d partitioned_table + Partitioned table "generated_identities.partitioned_table" + Column | Type | Collation | Nullable | Default +--------------------------------------------------------------------- + a | bigint | | not null | generated by default as identity + b | bigint | | not null | generated always as identity + c | integer | | | +Partition key: RANGE (c) +Number of partitions: 2 (Use \d+ to list them.) + +\c - - - :worker_1_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; +\d generated_identities.partitioned_table + Partitioned table "generated_identities.partitioned_table" + Column | Type | Collation | Nullable | Default +--------------------------------------------------------------------- + a | bigint | | not null | generated by default as identity + b | bigint | | not null | generated always as identity + c | integer | | | +Partition key: RANGE (c) +Number of partitions: 2 (Use \d+ to list them.) + +insert into partitioned_table (c) values (1); +insert into partitioned_table (c) SELECT 2; +INSERT INTO partitioned_table (c) SELECT s FROM generate_series(3,7) s; -SELECT * FROM generated_identities_test ORDER BY 1; - a | b | c | d | e | f | g ---------------------------------------------------------------------- - 1 | 10 | 1 | 1 | 1 | 1 | 1 - 2 | 20 | 2 | 2 | 2 | 2 | 2 - 3 | 30 | 3 | 3 | 3 | 3 | 3 - 4 | 40 | 4 | 4 | 4 | 4 | 4 - 5 | 50 | 5 | 5 | 5 | 5 | 5 - 6 | 60 | 6 | 6 | 6 | 6 | 6 - 7 | 70 | 7 | 7 | 7 | 7 | 7 -(7 rows) - -SELECT undistribute_table('generated_identities_test'); - undistribute_table ---------------------------------------------------------------------- - -(1 row) - -SELECT * FROM generated_identities_test ORDER BY 1; - a | b | c | d | e | f | g ---------------------------------------------------------------------- - 1 | 10 | 1 | 1 | 1 | 1 | 1 - 2 | 20 | 2 | 2 | 2 | 2 | 2 - 3 | 30 | 3 | 3 | 3 | 3 | 3 - 4 | 40 | 4 | 4 | 4 | 4 | 4 - 5 | 50 | 5 | 5 | 5 | 5 | 5 - 6 | 60 | 6 | 6 | 6 | 6 | 6 - 7 | 70 | 7 | 7 | 7 | 7 | 7 -(7 rows) - -\d generated_identities_test - Partitioned table "generated_identities.generated_identities_test" - Column | Type | Collation | Nullable | Default ---------------------------------------------------------------------- - a | integer | | not null | generated by default as identity - b | bigint | | not null | generated always as identity - c | smallint | | not null | generated by default as identity - d | integer | | not null | nextval('generated_identities_test_d_seq'::regclass) - e | bigint | | not null | nextval('generated_identities_test_e_seq'::regclass) - f | smallint | | not null | nextval('generated_identities_test_f_seq'::regclass) - g | integer | | | -Partition key: RANGE (a) -Number of partitions: 2 (Use \d+ to list them.) - -\c - - - :worker_1_port -\d generated_identities.generated_identities_test \c - - - :master_port SET search_path TO generated_identities; SET client_min_messages to ERROR; -INSERT INTO generated_identities_test (g) -SELECT s FROM generate_series(8,10) s; -SELECT * FROM generated_identities_test ORDER BY 1; - a | b | c | d | e | f | g +INSERT INTO partitioned_table (c) +SELECT s FROM generate_series(10,20) s; +INSERT INTO partitioned_table (a,c) VALUES (998,998); +INSERT INTO partitioned_table (a,b,c) OVERRIDING SYSTEM VALUE VALUES (999,999,999); +SELECT * FROM partitioned_table ORDER BY c ASC; + a | b | c --------------------------------------------------------------------- - 1 | 10 | 1 | 1 | 1 | 1 | 1 - 2 | 20 | 2 | 2 | 2 | 2 | 2 - 3 | 30 | 3 | 3 | 3 | 3 | 3 - 4 | 40 | 4 | 4 | 4 | 4 | 4 - 5 | 50 | 5 | 5 | 5 | 5 | 5 - 6 | 60 | 6 | 6 | 6 | 6 | 6 - 7 | 70 | 7 | 7 | 7 | 7 | 7 - 8 | 80 | 8 | 8 | 8 | 8 | 8 - 9 | 90 | 9 | 9 | 9 | 9 | 9 - 10 | 100 | 10 | 10 | 10 | 10 | 10 -(10 rows) - --- distributed table -SELECT create_distributed_table('generated_identities_test', 'a'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) + 3940649673949185 | 3940649673949185 | 1 + 3940649673949195 | 3940649673949195 | 2 + 3940649673949205 | 3940649673949205 | 3 + 3940649673949215 | 3940649673949215 | 4 + 3940649673949225 | 3940649673949225 | 5 + 3940649673949235 | 3940649673949235 | 6 + 3940649673949245 | 3940649673949245 | 7 + 10 | 10 | 10 + 20 | 20 | 11 + 30 | 30 | 12 + 40 | 40 | 13 + 50 | 50 | 14 + 60 | 60 | 15 + 70 | 70 | 16 + 80 | 80 | 17 + 90 | 90 | 18 + 100 | 100 | 19 + 110 | 110 | 20 + 998 | 120 | 998 + 999 | 999 | 999 +(20 rows) -- alter table .. alter column .. add is unsupported -ALTER TABLE generated_identities_test ALTER COLUMN g ADD GENERATED ALWAYS AS IDENTITY; +ALTER TABLE partitioned_table ALTER COLUMN g ADD GENERATED ALWAYS AS IDENTITY; ERROR: alter table command is currently unsupported DETAIL: Only ADD|DROP COLUMN, SET|DROP NOT NULL, SET|DROP DEFAULT, ADD|DROP|VALIDATE CONSTRAINT, SET (), RESET (), ENABLE|DISABLE|NO FORCE|FORCE ROW LEVEL SECURITY, ATTACH|DETACH PARTITION and TYPE subcommands are supported. -- alter table .. alter column is unsupported -ALTER TABLE generated_identities_test ALTER COLUMN b TYPE int; +ALTER TABLE partitioned_table ALTER COLUMN b TYPE int; ERROR: cannot execute ALTER COLUMN command involving identity column -SELECT alter_distributed_table('generated_identities_test', 'g'); - alter_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT alter_distributed_table('generated_identities_test', 'b'); - alter_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT alter_distributed_table('generated_identities_test', 'c'); - alter_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT undistribute_table('generated_identities_test'); - undistribute_table ---------------------------------------------------------------------- - -(1 row) - -SELECT * FROM generated_identities_test ORDER BY g; - a | b | c | d | e | f | g ---------------------------------------------------------------------- - 1 | 10 | 1 | 1 | 1 | 1 | 1 - 2 | 20 | 2 | 2 | 2 | 2 | 2 - 3 | 30 | 3 | 3 | 3 | 3 | 3 - 4 | 40 | 4 | 4 | 4 | 4 | 4 - 5 | 50 | 5 | 5 | 5 | 5 | 5 - 6 | 60 | 6 | 6 | 6 | 6 | 6 - 7 | 70 | 7 | 7 | 7 | 7 | 7 - 8 | 80 | 8 | 8 | 8 | 8 | 8 - 9 | 90 | 9 | 9 | 9 | 9 | 9 - 10 | 100 | 10 | 10 | 10 | 10 | 10 -(10 rows) - --- reference table -DROP TABLE generated_identities_test; -CREATE TABLE generated_identities_test ( - a int GENERATED BY DEFAULT AS IDENTITY, - b bigint GENERATED ALWAYS AS IDENTITY (START WITH 10 INCREMENT BY 10), - c smallint GENERATED BY DEFAULT AS IDENTITY, - d serial, - e bigserial, - f smallserial, - g int +DROP TABLE partitioned_table; +-- create a table for reference table testing. +CREATE TABLE reference_table ( + a bigint CONSTRAINT myconname GENERATED BY DEFAULT AS IDENTITY (START WITH 10 INCREMENT BY 10), + b bigint GENERATED ALWAYS AS IDENTITY (START WITH 10 INCREMENT BY 10) UNIQUE, + c int ); -SELECT create_reference_table('generated_identities_test'); +SELECT create_reference_table('reference_table'); create_reference_table --------------------------------------------------------------------- (1 row) -\d generated_identities_test - Table "generated_identities.generated_identities_test" - Column | Type | Collation | Nullable | Default +\d reference_table + Table "generated_identities.reference_table" + Column | Type | Collation | Nullable | Default --------------------------------------------------------------------- - a | integer | | not null | generated by default as identity - b | bigint | | not null | generated always as identity - c | smallint | | not null | generated by default as identity - d | integer | | not null | nextval('generated_identities_test_d_seq'::regclass) - e | bigint | | not null | nextval('generated_identities_test_e_seq'::regclass) - f | smallint | | not null | nextval('generated_identities_test_f_seq'::regclass) - g | integer | | | + a | bigint | | not null | generated by default as identity + b | bigint | | not null | generated always as identity + c | integer | | | +Indexes: + "reference_table_b_key" UNIQUE CONSTRAINT, btree (b) \c - - - :worker_1_port -\d generated_identities.generated_identities_test - Table "generated_identities.generated_identities_test" - Column | Type | Collation | Nullable | Default +SET search_path TO generated_identities; +\d generated_identities.reference_table + Table "generated_identities.reference_table" + Column | Type | Collation | Nullable | Default --------------------------------------------------------------------- - a | integer | | not null | worker_nextval('generated_identities.generated_identities_test_a_seq'::regclass) - b | bigint | | not null | nextval('generated_identities.generated_identities_test_b_seq'::regclass) - c | smallint | | not null | worker_nextval('generated_identities.generated_identities_test_c_seq'::regclass) - d | integer | | not null | worker_nextval('generated_identities.generated_identities_test_d_seq'::regclass) - e | bigint | | not null | nextval('generated_identities.generated_identities_test_e_seq'::regclass) - f | smallint | | not null | worker_nextval('generated_identities.generated_identities_test_f_seq'::regclass) - g | integer | | | + a | bigint | | not null | generated by default as identity + b | bigint | | not null | generated always as identity + c | integer | | | +Indexes: + "reference_table_b_key" UNIQUE CONSTRAINT, btree (b) + +INSERT INTO reference_table (c) +SELECT s FROM generate_series(1,10) s; +--on master +select * from reference_table; + a | b | c +--------------------------------------------------------------------- + 3940649673949185 | 3940649673949185 | 1 + 3940649673949195 | 3940649673949195 | 2 + 3940649673949205 | 3940649673949205 | 3 + 3940649673949215 | 3940649673949215 | 4 + 3940649673949225 | 3940649673949225 | 5 + 3940649673949235 | 3940649673949235 | 6 + 3940649673949245 | 3940649673949245 | 7 + 3940649673949255 | 3940649673949255 | 8 + 3940649673949265 | 3940649673949265 | 9 + 3940649673949275 | 3940649673949275 | 10 +(10 rows) \c - - - :master_port SET search_path TO generated_identities; SET client_min_messages to ERROR; -INSERT INTO generated_identities_test (g) +INSERT INTO reference_table (c) SELECT s FROM generate_series(11,20) s; -SELECT * FROM generated_identities_test ORDER BY g; - a | b | c | d | e | f | g +SELECT * FROM reference_table ORDER BY c ASC; + a | b | c --------------------------------------------------------------------- - 1 | 10 | 1 | 1 | 1 | 1 | 11 - 2 | 20 | 2 | 2 | 2 | 2 | 12 - 3 | 30 | 3 | 3 | 3 | 3 | 13 - 4 | 40 | 4 | 4 | 4 | 4 | 14 - 5 | 50 | 5 | 5 | 5 | 5 | 15 - 6 | 60 | 6 | 6 | 6 | 6 | 16 - 7 | 70 | 7 | 7 | 7 | 7 | 17 - 8 | 80 | 8 | 8 | 8 | 8 | 18 - 9 | 90 | 9 | 9 | 9 | 9 | 19 - 10 | 100 | 10 | 10 | 10 | 10 | 20 -(10 rows) + 3940649673949185 | 3940649673949185 | 1 + 3940649673949195 | 3940649673949195 | 2 + 3940649673949205 | 3940649673949205 | 3 + 3940649673949215 | 3940649673949215 | 4 + 3940649673949225 | 3940649673949225 | 5 + 3940649673949235 | 3940649673949235 | 6 + 3940649673949245 | 3940649673949245 | 7 + 3940649673949255 | 3940649673949255 | 8 + 3940649673949265 | 3940649673949265 | 9 + 3940649673949275 | 3940649673949275 | 10 + 10 | 10 | 11 + 20 | 20 | 12 + 30 | 30 | 13 + 40 | 40 | 14 + 50 | 50 | 15 + 60 | 60 | 16 + 70 | 70 | 17 + 80 | 80 | 18 + 90 | 90 | 19 + 100 | 100 | 20 +(20 rows) -SELECT undistribute_table('generated_identities_test'); - undistribute_table +DROP TABLE reference_table; +CREATE TABLE color ( + color_id BIGINT GENERATED ALWAYS AS IDENTITY UNIQUE, + color_name VARCHAR NOT NULL +); +-- https://github.com/citusdata/citus/issues/6694 +CREATE USER identity_test_user; +GRANT INSERT ON color TO identity_test_user; +GRANT USAGE ON SCHEMA generated_identities TO identity_test_user; +SET ROLE identity_test_user; +SELECT create_distributed_table('color', 'color_id'); +ERROR: must be owner of table color +SET ROLE postgres; +SET citus.shard_replication_factor TO 1; +SELECT create_distributed_table_concurrently('color', 'color_id'); + create_distributed_table_concurrently --------------------------------------------------------------------- (1 row) -\d generated_identities_test - Table "generated_identities.generated_identities_test" - Column | Type | Collation | Nullable | Default +RESET citus.shard_replication_factor; +\c - identity_test_user - :worker_1_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; +INSERT INTO color(color_name) VALUES ('Blue'); +\c - postgres - :master_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; +SET citus.next_shard_id TO 12400000; +DROP TABLE Color; +CREATE TABLE color ( + color_id BIGINT GENERATED ALWAYS AS IDENTITY UNIQUE, + color_name VARCHAR NOT NULL +) USING columnar; +SELECT create_distributed_table('color', 'color_id'); + create_distributed_table --------------------------------------------------------------------- - a | integer | | not null | generated by default as identity - b | bigint | | not null | generated always as identity - c | smallint | | not null | generated by default as identity - d | integer | | not null | nextval('generated_identities_test_d_seq'::regclass) - e | bigint | | not null | nextval('generated_identities_test_e_seq'::regclass) - f | smallint | | not null | nextval('generated_identities_test_f_seq'::regclass) - g | integer | | | + +(1 row) + +INSERT INTO color(color_name) VALUES ('Blue'); +\d+ color + Table "generated_identities.color" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------------------------------------------------------------------- + color_id | bigint | | not null | generated always as identity | plain | | + color_name | character varying | | not null | | extended | | +Indexes: + "color_color_id_key" UNIQUE CONSTRAINT, btree (color_id) \c - - - :worker_1_port -\d generated_identities.generated_identities_test -\c - - - :master_port +SET search_path TO generated_identities; +\d+ color + Table "generated_identities.color" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------------------------------------------------------------------- + color_id | bigint | | not null | generated always as identity | plain | | + color_name | character varying | | not null | | extended | | +Indexes: + "color_color_id_key" UNIQUE CONSTRAINT, btree (color_id) + +INSERT INTO color(color_name) VALUES ('Red'); +-- alter sequence .. restart +ALTER SEQUENCE color_color_id_seq RESTART WITH 1000; +ERROR: Altering a distributed sequence is currently not supported. +-- override system value +INSERT INTO color(color_id, color_name) VALUES (1, 'Red'); +ERROR: cannot insert a non-DEFAULT value into column "color_id" +DETAIL: Column "color_id" is an identity column defined as GENERATED ALWAYS. +HINT: Use OVERRIDING SYSTEM VALUE to override. +INSERT INTO color(color_id, color_name) VALUES (NULL, 'Red'); +ERROR: cannot insert a non-DEFAULT value into column "color_id" +DETAIL: Column "color_id" is an identity column defined as GENERATED ALWAYS. +HINT: Use OVERRIDING SYSTEM VALUE to override. +INSERT INTO color(color_id, color_name) OVERRIDING SYSTEM VALUE VALUES (1, 'Red'); +ERROR: duplicate key value violates unique constraint "color_color_id_key_12400000" +DETAIL: Key (color_id)=(1) already exists. +CONTEXT: while executing command on localhost:xxxxx +-- update null or custom value +UPDATE color SET color_id = NULL; +ERROR: column "color_id" can only be updated to DEFAULT +DETAIL: Column "color_id" is an identity column defined as GENERATED ALWAYS. +UPDATE color SET color_id = 1; +ERROR: column "color_id" can only be updated to DEFAULT +DETAIL: Column "color_id" is an identity column defined as GENERATED ALWAYS. +\c - postgres - :master_port SET search_path TO generated_identities; SET client_min_messages to ERROR; -- alter table .. add column .. GENERATED .. AS IDENTITY -DROP TABLE IF EXISTS color; -CREATE TABLE color ( - color_name VARCHAR NOT NULL -); -SELECT create_distributed_table('color', 'color_name'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - ALTER TABLE color ADD COLUMN color_id BIGINT GENERATED ALWAYS AS IDENTITY; -INSERT INTO color(color_name) VALUES ('Red'); -ALTER TABLE color ADD COLUMN color_id_1 BIGINT GENERATED ALWAYS AS IDENTITY; -ERROR: Cannot add an identity column because the table is not empty -DROP TABLE color; --- insert data from workers -CREATE TABLE color ( - color_id BIGINT GENERATED ALWAYS AS IDENTITY UNIQUE, - color_name VARCHAR NOT NULL -); -SELECT create_distributed_table('color', 'color_id'); +ERROR: cannot execute ADD COLUMN commands involving identity columns when metadata is synchronized to workers +-- alter sequence .. restart +ALTER SEQUENCE color_color_id_seq RESTART WITH 1000; +ERROR: Altering a distributed sequence is currently not supported. +-- override system value +INSERT INTO color(color_id, color_name) VALUES (1, 'Red'); +ERROR: cannot insert a non-DEFAULT value into column "color_id" +DETAIL: Column "color_id" is an identity column defined as GENERATED ALWAYS. +HINT: Use OVERRIDING SYSTEM VALUE to override. +INSERT INTO color(color_id, color_name) VALUES (NULL, 'Red'); +ERROR: cannot insert a non-DEFAULT value into column "color_id" +DETAIL: Column "color_id" is an identity column defined as GENERATED ALWAYS. +HINT: Use OVERRIDING SYSTEM VALUE to override. +INSERT INTO color(color_id, color_name) OVERRIDING SYSTEM VALUE VALUES (1, 'Red'); +ERROR: duplicate key value violates unique constraint "color_color_id_key_12400000" +DETAIL: Key (color_id)=(1) already exists. +CONTEXT: while executing command on localhost:xxxxx +-- update null or custom value +UPDATE color SET color_id = NULL; +ERROR: column "color_id" can only be updated to DEFAULT +DETAIL: Column "color_id" is an identity column defined as GENERATED ALWAYS. +UPDATE color SET color_id = 1; +ERROR: column "color_id" can only be updated to DEFAULT +DETAIL: Column "color_id" is an identity column defined as GENERATED ALWAYS. +DROP TABLE IF EXISTS test; +CREATE TABLE test (x int, y int, z bigint generated by default as identity); +SELECT create_distributed_table('test', 'x', colocate_with := 'none'); create_distributed_table --------------------------------------------------------------------- (1 row) -\c - - - :worker_1_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; -INSERT INTO color(color_name) VALUES ('Red'); -\c - - - :master_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; -SELECT undistribute_table('color'); - undistribute_table +INSERT INTO test VALUES (1,2); +INSERT INTO test SELECT x, y FROM test WHERE x = 1; +SELECT * FROM test; + x | y | z --------------------------------------------------------------------- + 1 | 2 | 1 + 1 | 2 | 2 +(2 rows) -(1 row) - -SELECT create_distributed_table('color', 'color_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -\c - - - :worker_1_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; -INSERT INTO color(color_name) VALUES ('Red'); -\c - - - :master_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; -INSERT INTO color(color_name) VALUES ('Red'); -SELECT count(*) from color; - count ---------------------------------------------------------------------- - 3 -(1 row) - --- modify sequence & alter table -DROP TABLE color; -CREATE TABLE color ( - color_id BIGINT GENERATED ALWAYS AS IDENTITY UNIQUE, - color_name VARCHAR NOT NULL -); -SELECT create_distributed_table('color', 'color_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -\c - - - :worker_1_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; -INSERT INTO color(color_name) VALUES ('Red'); -\c - - - :master_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; -SELECT undistribute_table('color'); - undistribute_table ---------------------------------------------------------------------- - -(1 row) - -ALTER SEQUENCE color_color_id_seq RENAME TO myseq; -SELECT create_distributed_table('color', 'color_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -\ds+ myseq - List of relations - Schema | Name | Type | Owner | Persistence | Size | Description ---------------------------------------------------------------------- - generated_identities | myseq | sequence | postgres | permanent | 8192 bytes | -(1 row) - -\ds+ color_color_id_seq - List of relations - Schema | Name | Type | Owner | Persistence | Size | Description ---------------------------------------------------------------------- -(0 rows) - -\d color - Table "generated_identities.color" - Column | Type | Collation | Nullable | Default ---------------------------------------------------------------------- - color_id | bigint | | not null | generated always as identity - color_name | character varying | | not null | -Indexes: - "color_color_id_key" UNIQUE CONSTRAINT, btree (color_id) - -\c - - - :worker_1_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; -\ds+ myseq - List of relations - Schema | Name | Type | Owner | Persistence | Size | Description ---------------------------------------------------------------------- - generated_identities | myseq | sequence | postgres | permanent | 8192 bytes | -(1 row) - -\ds+ color_color_id_seq - List of relations - Schema | Name | Type | Owner | Persistence | Size | Description ---------------------------------------------------------------------- -(0 rows) - -\d color - Table "generated_identities.color" - Column | Type | Collation | Nullable | Default ---------------------------------------------------------------------- - color_id | bigint | | not null | nextval('myseq'::regclass) - color_name | character varying | | not null | -Indexes: - "color_color_id_key" UNIQUE CONSTRAINT, btree (color_id) - -INSERT INTO color(color_name) VALUES ('Red'); -\c - - - :master_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; -ALTER SEQUENCE myseq RENAME TO color_color_id_seq; -\ds+ myseq - List of relations - Schema | Name | Type | Owner | Persistence | Size | Description ---------------------------------------------------------------------- -(0 rows) - -\ds+ color_color_id_seq - List of relations - Schema | Name | Type | Owner | Persistence | Size | Description ---------------------------------------------------------------------- - generated_identities | color_color_id_seq | sequence | postgres | permanent | 8192 bytes | -(1 row) - -INSERT INTO color(color_name) VALUES ('Red'); -\c - - - :worker_1_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; -\ds+ myseq - List of relations - Schema | Name | Type | Owner | Persistence | Size | Description ---------------------------------------------------------------------- -(0 rows) - -\ds+ color_color_id_seq - List of relations - Schema | Name | Type | Owner | Persistence | Size | Description ---------------------------------------------------------------------- - generated_identities | color_color_id_seq | sequence | postgres | permanent | 8192 bytes | -(1 row) - -\d color - Table "generated_identities.color" - Column | Type | Collation | Nullable | Default ---------------------------------------------------------------------- - color_id | bigint | | not null | nextval('color_color_id_seq'::regclass) - color_name | character varying | | not null | -Indexes: - "color_color_id_key" UNIQUE CONSTRAINT, btree (color_id) - -INSERT INTO color(color_name) VALUES ('Red'); -\c - - - :master_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; -SELECT alter_distributed_table('co23423lor', shard_count := 6); -ERROR: relation "co23423lor" does not exist -INSERT INTO color(color_name) VALUES ('Red'); -\c - - - :worker_1_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; -\ds+ color_color_id_seq - List of relations - Schema | Name | Type | Owner | Persistence | Size | Description ---------------------------------------------------------------------- - generated_identities | color_color_id_seq | sequence | postgres | permanent | 8192 bytes | -(1 row) - -INSERT INTO color(color_name) VALUES ('Red'); -\c - - - :master_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; DROP SCHEMA generated_identities CASCADE; +DROP USER identity_test_user; diff --git a/src/test/regress/expected/generated_identity_0.out b/src/test/regress/expected/generated_identity_0.out new file mode 100644 index 000000000..1bff7f68f --- /dev/null +++ b/src/test/regress/expected/generated_identity_0.out @@ -0,0 +1,431 @@ +-- This test file has an alternative output because of error messages vary for PG13 +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int <= 13 AS server_version_le_13; + server_version_le_13 +--------------------------------------------------------------------- + t +(1 row) + +CREATE SCHEMA generated_identities; +SET search_path TO generated_identities; +SET client_min_messages to ERROR; +SET citus.shard_replication_factor TO 1; +SELECT 1 from citus_add_node('localhost', :master_port, groupId=>0); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +-- smallint identity column can not be distributed +CREATE TABLE smallint_identity_column ( + a smallint GENERATED BY DEFAULT AS IDENTITY +); +SELECT create_distributed_table('smallint_identity_column', 'a'); +ERROR: cannot complete operation on generated_identities.smallint_identity_column with smallint/int identity column +HINT: Use bigint identity column instead. +SELECT create_distributed_table_concurrently('smallint_identity_column', 'a'); +ERROR: cannot complete operation on generated_identities.smallint_identity_column with smallint/int identity column +HINT: Use bigint identity column instead. +SELECT create_reference_table('smallint_identity_column'); +ERROR: cannot complete operation on a table with identity column +SELECT citus_add_local_table_to_metadata('smallint_identity_column'); + citus_add_local_table_to_metadata +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE smallint_identity_column; +-- int identity column can not be distributed +CREATE TABLE int_identity_column ( + a int GENERATED BY DEFAULT AS IDENTITY +); +SELECT create_distributed_table('int_identity_column', 'a'); +ERROR: cannot complete operation on generated_identities.int_identity_column with smallint/int identity column +HINT: Use bigint identity column instead. +SELECT create_distributed_table_concurrently('int_identity_column', 'a'); +ERROR: cannot complete operation on generated_identities.int_identity_column with smallint/int identity column +HINT: Use bigint identity column instead. +SELECT create_reference_table('int_identity_column'); +ERROR: cannot complete operation on a table with identity column +SELECT citus_add_local_table_to_metadata('int_identity_column'); + citus_add_local_table_to_metadata +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE int_identity_column; +RESET citus.shard_replication_factor; +CREATE TABLE bigint_identity_column ( + a bigint GENERATED BY DEFAULT AS IDENTITY, + b int +); +SELECT citus_add_local_table_to_metadata('bigint_identity_column'); + citus_add_local_table_to_metadata +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE bigint_identity_column; +CREATE TABLE bigint_identity_column ( + a bigint GENERATED BY DEFAULT AS IDENTITY, + b int +); +SELECT create_distributed_table('bigint_identity_column', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +\d bigint_identity_column + Table "generated_identities.bigint_identity_column" + Column | Type | Collation | Nullable | Default +--------------------------------------------------------------------- + a | bigint | | not null | generated by default as identity + b | integer | | | + +\c - - - :worker_1_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; +INSERT INTO bigint_identity_column (b) +SELECT s FROM generate_series(1,10) s; +\d generated_identities.bigint_identity_column + Table "generated_identities.bigint_identity_column" + Column | Type | Collation | Nullable | Default +--------------------------------------------------------------------- + a | bigint | | not null | generated by default as identity + b | integer | | | + +\c - - - :master_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; +INSERT INTO bigint_identity_column (b) +SELECT s FROM generate_series(11,20) s; +SELECT * FROM bigint_identity_column ORDER BY B ASC; + a | b +--------------------------------------------------------------------- + 3940649673949185 | 1 + 3940649673949186 | 2 + 3940649673949187 | 3 + 3940649673949188 | 4 + 3940649673949189 | 5 + 3940649673949190 | 6 + 3940649673949191 | 7 + 3940649673949192 | 8 + 3940649673949193 | 9 + 3940649673949194 | 10 + 1 | 11 + 2 | 12 + 3 | 13 + 4 | 14 + 5 | 15 + 6 | 16 + 7 | 17 + 8 | 18 + 9 | 19 + 10 | 20 +(20 rows) + +-- table with identity column cannot be altered. +SELECT alter_distributed_table('bigint_identity_column', 'b'); +ERROR: cannot complete operation on a table with identity column +-- table with identity column cannot be undistributed. +SELECT undistribute_table('bigint_identity_column'); +ERROR: cannot complete operation on a table with identity column +DROP TABLE bigint_identity_column; +-- create a partitioned table for testing. +CREATE TABLE partitioned_table ( + a bigint CONSTRAINT myconname GENERATED BY DEFAULT AS IDENTITY (START WITH 10 INCREMENT BY 10), + b bigint GENERATED ALWAYS AS IDENTITY (START WITH 10 INCREMENT BY 10), + c int +) +PARTITION BY RANGE (c); +CREATE TABLE partitioned_table_1_50 PARTITION OF partitioned_table FOR VALUES FROM (1) TO (50); +CREATE TABLE partitioned_table_50_500 PARTITION OF partitioned_table FOR VALUES FROM (50) TO (1000); +SELECT create_distributed_table('partitioned_table', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +\d partitioned_table + Partitioned table "generated_identities.partitioned_table" + Column | Type | Collation | Nullable | Default +--------------------------------------------------------------------- + a | bigint | | not null | generated by default as identity + b | bigint | | not null | generated always as identity + c | integer | | | +Partition key: RANGE (c) +Number of partitions: 2 (Use \d+ to list them.) + +\c - - - :worker_1_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; +\d generated_identities.partitioned_table + Partitioned table "generated_identities.partitioned_table" + Column | Type | Collation | Nullable | Default +--------------------------------------------------------------------- + a | bigint | | not null | generated by default as identity + b | bigint | | not null | generated always as identity + c | integer | | | +Partition key: RANGE (c) +Number of partitions: 2 (Use \d+ to list them.) + +insert into partitioned_table (c) values (1); +insert into partitioned_table (c) SELECT 2; +INSERT INTO partitioned_table (c) +SELECT s FROM generate_series(3,7) s; +\c - - - :master_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; +INSERT INTO partitioned_table (c) +SELECT s FROM generate_series(10,20) s; +INSERT INTO partitioned_table (a,c) VALUES (998,998); +INSERT INTO partitioned_table (a,b,c) OVERRIDING SYSTEM VALUE VALUES (999,999,999); +SELECT * FROM partitioned_table ORDER BY c ASC; + a | b | c +--------------------------------------------------------------------- + 3940649673949185 | 3940649673949185 | 1 + 3940649673949195 | 3940649673949195 | 2 + 3940649673949205 | 3940649673949205 | 3 + 3940649673949215 | 3940649673949215 | 4 + 3940649673949225 | 3940649673949225 | 5 + 3940649673949235 | 3940649673949235 | 6 + 3940649673949245 | 3940649673949245 | 7 + 10 | 10 | 10 + 20 | 20 | 11 + 30 | 30 | 12 + 40 | 40 | 13 + 50 | 50 | 14 + 60 | 60 | 15 + 70 | 70 | 16 + 80 | 80 | 17 + 90 | 90 | 18 + 100 | 100 | 19 + 110 | 110 | 20 + 998 | 120 | 998 + 999 | 999 | 999 +(20 rows) + +-- alter table .. alter column .. add is unsupported +ALTER TABLE partitioned_table ALTER COLUMN g ADD GENERATED ALWAYS AS IDENTITY; +ERROR: alter table command is currently unsupported +DETAIL: Only ADD|DROP COLUMN, SET|DROP NOT NULL, SET|DROP DEFAULT, ADD|DROP|VALIDATE CONSTRAINT, SET (), RESET (), ENABLE|DISABLE|NO FORCE|FORCE ROW LEVEL SECURITY, ATTACH|DETACH PARTITION and TYPE subcommands are supported. +-- alter table .. alter column is unsupported +ALTER TABLE partitioned_table ALTER COLUMN b TYPE int; +ERROR: cannot execute ALTER COLUMN command involving identity column +DROP TABLE partitioned_table; +-- create a table for reference table testing. +CREATE TABLE reference_table ( + a bigint CONSTRAINT myconname GENERATED BY DEFAULT AS IDENTITY (START WITH 10 INCREMENT BY 10), + b bigint GENERATED ALWAYS AS IDENTITY (START WITH 10 INCREMENT BY 10) UNIQUE, + c int +); +SELECT create_reference_table('reference_table'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +\d reference_table + Table "generated_identities.reference_table" + Column | Type | Collation | Nullable | Default +--------------------------------------------------------------------- + a | bigint | | not null | generated by default as identity + b | bigint | | not null | generated always as identity + c | integer | | | +Indexes: + "reference_table_b_key" UNIQUE CONSTRAINT, btree (b) + +\c - - - :worker_1_port +SET search_path TO generated_identities; +\d generated_identities.reference_table + Table "generated_identities.reference_table" + Column | Type | Collation | Nullable | Default +--------------------------------------------------------------------- + a | bigint | | not null | generated by default as identity + b | bigint | | not null | generated always as identity + c | integer | | | +Indexes: + "reference_table_b_key" UNIQUE CONSTRAINT, btree (b) + +INSERT INTO reference_table (c) +SELECT s FROM generate_series(1,10) s; +--on master +select * from reference_table; + a | b | c +--------------------------------------------------------------------- + 3940649673949185 | 3940649673949185 | 1 + 3940649673949195 | 3940649673949195 | 2 + 3940649673949205 | 3940649673949205 | 3 + 3940649673949215 | 3940649673949215 | 4 + 3940649673949225 | 3940649673949225 | 5 + 3940649673949235 | 3940649673949235 | 6 + 3940649673949245 | 3940649673949245 | 7 + 3940649673949255 | 3940649673949255 | 8 + 3940649673949265 | 3940649673949265 | 9 + 3940649673949275 | 3940649673949275 | 10 +(10 rows) + +\c - - - :master_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; +INSERT INTO reference_table (c) +SELECT s FROM generate_series(11,20) s; +SELECT * FROM reference_table ORDER BY c ASC; + a | b | c +--------------------------------------------------------------------- + 3940649673949185 | 3940649673949185 | 1 + 3940649673949195 | 3940649673949195 | 2 + 3940649673949205 | 3940649673949205 | 3 + 3940649673949215 | 3940649673949215 | 4 + 3940649673949225 | 3940649673949225 | 5 + 3940649673949235 | 3940649673949235 | 6 + 3940649673949245 | 3940649673949245 | 7 + 3940649673949255 | 3940649673949255 | 8 + 3940649673949265 | 3940649673949265 | 9 + 3940649673949275 | 3940649673949275 | 10 + 10 | 10 | 11 + 20 | 20 | 12 + 30 | 30 | 13 + 40 | 40 | 14 + 50 | 50 | 15 + 60 | 60 | 16 + 70 | 70 | 17 + 80 | 80 | 18 + 90 | 90 | 19 + 100 | 100 | 20 +(20 rows) + +DROP TABLE reference_table; +CREATE TABLE color ( + color_id BIGINT GENERATED ALWAYS AS IDENTITY UNIQUE, + color_name VARCHAR NOT NULL +); +-- https://github.com/citusdata/citus/issues/6694 +CREATE USER identity_test_user; +GRANT INSERT ON color TO identity_test_user; +GRANT USAGE ON SCHEMA generated_identities TO identity_test_user; +SET ROLE identity_test_user; +SELECT create_distributed_table('color', 'color_id'); +ERROR: must be owner of table color +SET ROLE postgres; +SET citus.shard_replication_factor TO 1; +SELECT create_distributed_table_concurrently('color', 'color_id'); + create_distributed_table_concurrently +--------------------------------------------------------------------- + +(1 row) + +RESET citus.shard_replication_factor; +\c - identity_test_user - :worker_1_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; +INSERT INTO color(color_name) VALUES ('Blue'); +\c - postgres - :master_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; +SET citus.next_shard_id TO 12400000; +DROP TABLE Color; +CREATE TABLE color ( + color_id BIGINT GENERATED ALWAYS AS IDENTITY UNIQUE, + color_name VARCHAR NOT NULL +) USING columnar; +SELECT create_distributed_table('color', 'color_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO color(color_name) VALUES ('Blue'); +\d+ color + Table "generated_identities.color" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------------------------------------------------------------------- + color_id | bigint | | not null | generated always as identity | plain | | + color_name | character varying | | not null | | extended | | +Indexes: + "color_color_id_key" UNIQUE CONSTRAINT, btree (color_id) + +\c - - - :worker_1_port +SET search_path TO generated_identities; +\d+ color + Table "generated_identities.color" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------------------------------------------------------------------- + color_id | bigint | | not null | generated always as identity | plain | | + color_name | character varying | | not null | | extended | | +Indexes: + "color_color_id_key" UNIQUE CONSTRAINT, btree (color_id) + +INSERT INTO color(color_name) VALUES ('Red'); +-- alter sequence .. restart +ALTER SEQUENCE color_color_id_seq RESTART WITH 1000; +ERROR: Altering a distributed sequence is currently not supported. +-- override system value +INSERT INTO color(color_id, color_name) VALUES (1, 'Red'); +ERROR: cannot insert into column "color_id" +DETAIL: Column "color_id" is an identity column defined as GENERATED ALWAYS. +HINT: Use OVERRIDING SYSTEM VALUE to override. +INSERT INTO color(color_id, color_name) VALUES (NULL, 'Red'); +ERROR: cannot insert into column "color_id" +DETAIL: Column "color_id" is an identity column defined as GENERATED ALWAYS. +HINT: Use OVERRIDING SYSTEM VALUE to override. +INSERT INTO color(color_id, color_name) OVERRIDING SYSTEM VALUE VALUES (1, 'Red'); +ERROR: duplicate key value violates unique constraint "color_color_id_key_12400000" +DETAIL: Key (color_id)=(1) already exists. +CONTEXT: while executing command on localhost:xxxxx +-- update null or custom value +UPDATE color SET color_id = NULL; +ERROR: column "color_id" can only be updated to DEFAULT +DETAIL: Column "color_id" is an identity column defined as GENERATED ALWAYS. +UPDATE color SET color_id = 1; +ERROR: column "color_id" can only be updated to DEFAULT +DETAIL: Column "color_id" is an identity column defined as GENERATED ALWAYS. +\c - postgres - :master_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; +-- alter table .. add column .. GENERATED .. AS IDENTITY +ALTER TABLE color ADD COLUMN color_id BIGINT GENERATED ALWAYS AS IDENTITY; +ERROR: cannot execute ADD COLUMN commands involving identity columns when metadata is synchronized to workers +-- alter sequence .. restart +ALTER SEQUENCE color_color_id_seq RESTART WITH 1000; +ERROR: Altering a distributed sequence is currently not supported. +-- override system value +INSERT INTO color(color_id, color_name) VALUES (1, 'Red'); +ERROR: cannot insert into column "color_id" +DETAIL: Column "color_id" is an identity column defined as GENERATED ALWAYS. +HINT: Use OVERRIDING SYSTEM VALUE to override. +INSERT INTO color(color_id, color_name) VALUES (NULL, 'Red'); +ERROR: cannot insert into column "color_id" +DETAIL: Column "color_id" is an identity column defined as GENERATED ALWAYS. +HINT: Use OVERRIDING SYSTEM VALUE to override. +INSERT INTO color(color_id, color_name) OVERRIDING SYSTEM VALUE VALUES (1, 'Red'); +ERROR: duplicate key value violates unique constraint "color_color_id_key_12400000" +DETAIL: Key (color_id)=(1) already exists. +CONTEXT: while executing command on localhost:xxxxx +-- update null or custom value +UPDATE color SET color_id = NULL; +ERROR: column "color_id" can only be updated to DEFAULT +DETAIL: Column "color_id" is an identity column defined as GENERATED ALWAYS. +UPDATE color SET color_id = 1; +ERROR: column "color_id" can only be updated to DEFAULT +DETAIL: Column "color_id" is an identity column defined as GENERATED ALWAYS. +DROP TABLE IF EXISTS test; +CREATE TABLE test (x int, y int, z bigint generated by default as identity); +SELECT create_distributed_table('test', 'x', colocate_with := 'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO test VALUES (1,2); +INSERT INTO test SELECT x, y FROM test WHERE x = 1; +SELECT * FROM test; + x | y | z +--------------------------------------------------------------------- + 1 | 2 | 1 + 1 | 2 | 2 +(2 rows) + +DROP SCHEMA generated_identities CASCADE; +DROP USER identity_test_user; diff --git a/src/test/regress/expected/isolation_merge.out b/src/test/regress/expected/isolation_merge.out new file mode 100644 index 000000000..d78c46c64 --- /dev/null +++ b/src/test/regress/expected/isolation_merge.out @@ -0,0 +1,147 @@ +Parsed test spec with 2 sessions + +starting permutation: s1-begin s1-upd-ins s2-result s1-commit s2-result +step s1-begin: BEGIN; +step s1-upd-ins: MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED THEN UPDATE SET t2 = t2 + 1 + WHEN NOT MATCHED THEN INSERT VALUES(s1, s2); + +step s2-result: SELECT * FROM prept ORDER BY 1; + t1|t2 +--------------------------------------------------------------------- +100| 0 +(1 row) + +step s1-commit: COMMIT; +step s2-result: SELECT * FROM prept ORDER BY 1; + t1|t2 +--------------------------------------------------------------------- +100| 1 +200| 0 +(2 rows) + + +starting permutation: s1-begin s1-upd-ins s2-begin s2-upd-del s1-commit s2-commit s2-result +step s1-begin: BEGIN; +step s1-upd-ins: MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED THEN UPDATE SET t2 = t2 + 1 + WHEN NOT MATCHED THEN INSERT VALUES(s1, s2); + +step s2-begin: BEGIN; +step s2-upd-del: MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED AND prept.t2 = 0 THEN DELETE + WHEN MATCHED THEN UPDATE SET t2 = t2 + 1; + +step s1-commit: COMMIT; +step s2-upd-del: <... completed> +step s2-commit: COMMIT; +step s2-result: SELECT * FROM prept ORDER BY 1; + t1|t2 +--------------------------------------------------------------------- +100| 2 +(1 row) + + +starting permutation: s2-begin s2-upd-del s1-begin s1-upd-ins s2-commit s1-commit s2-result +step s2-begin: BEGIN; +step s2-upd-del: MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED AND prept.t2 = 0 THEN DELETE + WHEN MATCHED THEN UPDATE SET t2 = t2 + 1; + +step s1-begin: BEGIN; +step s1-upd-ins: MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED THEN UPDATE SET t2 = t2 + 1 + WHEN NOT MATCHED THEN INSERT VALUES(s1, s2); + +step s2-commit: COMMIT; +step s1-upd-ins: <... completed> +step s1-commit: COMMIT; +step s2-result: SELECT * FROM prept ORDER BY 1; + t1|t2 +--------------------------------------------------------------------- +100| 0 +200| 0 +(2 rows) + + +starting permutation: s1-begin s1-upd-ins s2-begin s2-upd s1-commit s2-commit s2-result +step s1-begin: BEGIN; +step s1-upd-ins: MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED THEN UPDATE SET t2 = t2 + 1 + WHEN NOT MATCHED THEN INSERT VALUES(s1, s2); + +step s2-begin: BEGIN; +step s2-upd: MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED THEN UPDATE SET t2 = t2 + 1; + +step s1-commit: COMMIT; +step s2-upd: <... completed> +step s2-commit: COMMIT; +step s2-result: SELECT * FROM prept ORDER BY 1; + t1|t2 +--------------------------------------------------------------------- +100| 2 +200| 1 +(2 rows) + + +starting permutation: s2-begin s2-ins s1-begin s1-del s2-upd s2-result s2-commit s1-commit s2-result +step s2-begin: BEGIN; +step s2-ins: MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN NOT MATCHED THEN INSERT VALUES(s1, s2); + +step s1-begin: BEGIN; +step s1-del: MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED THEN DELETE; + +step s2-upd: MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED THEN UPDATE SET t2 = t2 + 1; + +step s2-result: SELECT * FROM prept ORDER BY 1; + t1|t2 +--------------------------------------------------------------------- +100| 1 +200| 1 +(2 rows) + +step s2-commit: COMMIT; +step s1-del: <... completed> +step s1-commit: COMMIT; +step s2-result: SELECT * FROM prept ORDER BY 1; +t1|t2 +--------------------------------------------------------------------- +(0 rows) + + +starting permutation: s1-begin s1-del-ins s2-begin s2-upd s1-result s1-ins s1-commit s2-upd s2-commit s2-result +step s1-begin: BEGIN; +step s1-del-ins: MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED THEN DELETE + WHEN NOT MATCHED THEN INSERT VALUES(s1, s2); + +step s2-begin: BEGIN; +step s2-upd: MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED THEN UPDATE SET t2 = t2 + 1; + +step s1-result: SELECT * FROM prept ORDER BY 1; + t1|t2 +--------------------------------------------------------------------- +200| 0 +(1 row) + +step s1-ins: MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN NOT MATCHED THEN INSERT VALUES(s1, s2); + +step s1-commit: COMMIT; +step s2-upd: <... completed> +step s2-upd: MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED THEN UPDATE SET t2 = t2 + 1; + +step s2-commit: COMMIT; +step s2-result: SELECT * FROM prept ORDER BY 1; + t1|t2 +--------------------------------------------------------------------- +100| 2 +200| 2 +(2 rows) + diff --git a/src/test/regress/expected/isolation_merge_0.out b/src/test/regress/expected/isolation_merge_0.out new file mode 100644 index 000000000..3b43a25e6 --- /dev/null +++ b/src/test/regress/expected/isolation_merge_0.out @@ -0,0 +1,5 @@ +Parsed test spec with 2 sessions + +starting permutation: s1-begin s1-upd-ins s2-result s1-commit s2-result +setup failed: ERROR: MERGE is not supported on PG versions below 15 +CONTEXT: PL/pgSQL function inline_code_block line XX at RAISE diff --git a/src/test/regress/expected/isolation_merge_replicated.out b/src/test/regress/expected/isolation_merge_replicated.out new file mode 100644 index 000000000..e7e8b36ba --- /dev/null +++ b/src/test/regress/expected/isolation_merge_replicated.out @@ -0,0 +1,26 @@ +Parsed test spec with 2 sessions + +starting permutation: s1-begin s1-upd-ins s2-begin s2-update s1-commit s2-commit s1-result s2-result +step s1-begin: BEGIN; +step s1-upd-ins: MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED THEN UPDATE SET t2 = t2 + 1 + WHEN NOT MATCHED THEN INSERT VALUES(s1, s2); +step s2-begin: BEGIN; +step s2-update: UPDATE preps SET s2 = s2 + 1; +step s1-commit: COMMIT; +step s2-update: <... completed> +step s2-commit: COMMIT; +step s1-result: SELECT * FROM preps ORDER BY 1; + s1|s2 +--------------------------------------------------------------------- +100| 1 +200| 1 +(2 rows) + +step s2-result: SELECT * FROM prept ORDER BY 1; + t1|t2 +--------------------------------------------------------------------- +100| 1 +200| 0 +(2 rows) + diff --git a/src/test/regress/expected/isolation_merge_replicated_0.out b/src/test/regress/expected/isolation_merge_replicated_0.out new file mode 100644 index 000000000..51161dfb7 --- /dev/null +++ b/src/test/regress/expected/isolation_merge_replicated_0.out @@ -0,0 +1,5 @@ +Parsed test spec with 2 sessions + +starting permutation: s1-begin s1-upd-ins s2-begin s2-update s1-commit s2-commit s1-result s2-result +setup failed: ERROR: MERGE is not supported on PG versions below 15 +CONTEXT: PL/pgSQL function inline_code_block line XX at RAISE diff --git a/src/test/regress/expected/local_shard_execution.out b/src/test/regress/expected/local_shard_execution.out index 59f59d4b3..f77af42da 100644 --- a/src/test/regress/expected/local_shard_execution.out +++ b/src/test/regress/expected/local_shard_execution.out @@ -554,7 +554,7 @@ SELECT count(*) FROM second_distributed_table; 2 (1 row) -SELECT * FROM second_distributed_table; +SELECT * FROM second_distributed_table ORDER BY 1; key | value --------------------------------------------------------------------- 1 | 1 @@ -2031,6 +2031,7 @@ NOTICE: executing the command locally: SELECT count(*) AS count FROM ((SELECT f RESET client_min_messages; RESET citus.log_local_commands; \c - - - :master_port +SET search_path TO local_shard_execution; SET citus.next_shard_id TO 1480000; -- test both local and remote execution with custom type SET citus.shard_replication_factor TO 1; @@ -2353,7 +2354,9 @@ EXECUTE router_select_with_no_dist_key_filter('yes'); -- rest of the tests assume the table is empty TRUNCATE event_responses; CREATE OR REPLACE PROCEDURE register_for_event(p_event_id int, p_user_id int, p_choice invite_resp) -LANGUAGE plpgsql AS $fn$ +LANGUAGE plpgsql +SET search_path TO local_shard_execution +AS $fn$ BEGIN INSERT INTO event_responses VALUES (p_event_id, p_user_id, p_choice) ON CONFLICT (event_id, user_id) @@ -2386,6 +2389,7 @@ CALL register_for_event(16, 1, 'yes'); CALL register_for_event(16, 1, 'yes'); CALL register_for_event(16, 1, 'yes'); \c - - - :worker_2_port +SET search_path TO local_shard_execution; CALL register_for_event(16, 1, 'yes'); CALL register_for_event(16, 1, 'yes'); CALL register_for_event(16, 1, 'yes'); @@ -2409,16 +2413,16 @@ SET citus.log_local_commands TO ON; SET client_min_messages TO DEBUG2; CALL register_for_event(19, 1, 'yes'); DEBUG: not pushing down procedure to the same node -NOTICE: executing the command locally: INSERT INTO public.event_responses_1480001 AS citus_table_alias (event_id, user_id, response) VALUES (19, 1, 'yes'::public.invite_resp) ON CONFLICT(event_id, user_id) DO UPDATE SET response = excluded.response -NOTICE: executing the command locally: SELECT count(*) AS count FROM public.event_responses_1480001 event_responses WHERE (event_id OPERATOR(pg_catalog.=) 19) -NOTICE: executing the command locally: SELECT count(*) AS count FROM (SELECT NULL::integer AS event_id, NULL::integer AS user_id, NULL::public.invite_resp AS response WHERE false) event_responses(event_id, user_id, response) WHERE ((event_id OPERATOR(pg_catalog.=) 19) AND false) -NOTICE: executing the command locally: UPDATE public.event_responses_1480001 event_responses SET response = 'yes'::public.invite_resp WHERE (event_id OPERATOR(pg_catalog.=) 19) +NOTICE: executing the command locally: INSERT INTO local_shard_execution.event_responses_1480001 AS citus_table_alias (event_id, user_id, response) VALUES (19, 1, 'yes'::local_shard_execution.invite_resp) ON CONFLICT(event_id, user_id) DO UPDATE SET response = excluded.response +NOTICE: executing the command locally: SELECT count(*) AS count FROM local_shard_execution.event_responses_1480001 event_responses WHERE (event_id OPERATOR(pg_catalog.=) 19) +NOTICE: executing the command locally: SELECT count(*) AS count FROM (SELECT NULL::integer AS event_id, NULL::integer AS user_id, NULL::local_shard_execution.invite_resp AS response WHERE false) event_responses(event_id, user_id, response) WHERE ((event_id OPERATOR(pg_catalog.=) 19) AND false) +NOTICE: executing the command locally: UPDATE local_shard_execution.event_responses_1480001 event_responses SET response = 'yes'::local_shard_execution.invite_resp WHERE (event_id OPERATOR(pg_catalog.=) 19) -- should be fine even if no parameters exists in the query SELECT count(*) FROM event_responses WHERE event_id = 16; DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: query has a single distribution column value: 16 -NOTICE: executing the command locally: SELECT count(*) AS count FROM public.event_responses_1480001 event_responses WHERE (event_id OPERATOR(pg_catalog.=) 16) +NOTICE: executing the command locally: SELECT count(*) AS count FROM local_shard_execution.event_responses_1480001 event_responses WHERE (event_id OPERATOR(pg_catalog.=) 16) count --------------------------------------------------------------------- 1 @@ -2428,7 +2432,7 @@ SELECT count(*) FROM event_responses WHERE event_id = 16; DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: query has a single distribution column value: 16 -NOTICE: executing the command locally: SELECT count(*) AS count FROM public.event_responses_1480001 event_responses WHERE (event_id OPERATOR(pg_catalog.=) 16) +NOTICE: executing the command locally: SELECT count(*) AS count FROM local_shard_execution.event_responses_1480001 event_responses WHERE (event_id OPERATOR(pg_catalog.=) 16) count --------------------------------------------------------------------- 1 @@ -2438,13 +2442,13 @@ UPDATE event_responses SET response = 'no' WHERE event_id = 16; DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: query has a single distribution column value: 16 -NOTICE: executing the command locally: UPDATE public.event_responses_1480001 event_responses SET response = 'no'::public.invite_resp WHERE (event_id OPERATOR(pg_catalog.=) 16) +NOTICE: executing the command locally: UPDATE local_shard_execution.event_responses_1480001 event_responses SET response = 'no'::local_shard_execution.invite_resp WHERE (event_id OPERATOR(pg_catalog.=) 16) INSERT INTO event_responses VALUES (16, 666, 'maybe') ON CONFLICT (event_id, user_id) DO UPDATE SET response = EXCLUDED.response RETURNING *; DEBUG: Creating router plan DEBUG: query has a single distribution column value: 16 -NOTICE: executing the command locally: INSERT INTO public.event_responses_1480001 AS citus_table_alias (event_id, user_id, response) VALUES (16, 666, 'maybe'::public.invite_resp) ON CONFLICT(event_id, user_id) DO UPDATE SET response = excluded.response RETURNING citus_table_alias.event_id, citus_table_alias.user_id, citus_table_alias.response +NOTICE: executing the command locally: INSERT INTO local_shard_execution.event_responses_1480001 AS citus_table_alias (event_id, user_id, response) VALUES (16, 666, 'maybe'::local_shard_execution.invite_resp) ON CONFLICT(event_id, user_id) DO UPDATE SET response = excluded.response RETURNING citus_table_alias.event_id, citus_table_alias.user_id, citus_table_alias.response event_id | user_id | response --------------------------------------------------------------------- 16 | 666 | maybe @@ -2455,7 +2459,7 @@ INSERT INTO event_responses VALUES (16, 666, 'maybe'), (17, 777, 'no') ON CONFLICT (event_id, user_id) DO UPDATE SET response = EXCLUDED.response RETURNING *; DEBUG: Creating router plan -NOTICE: executing the command locally: INSERT INTO public.event_responses_1480001 AS citus_table_alias (event_id, user_id, response) VALUES (16,666,'maybe'::public.invite_resp), (17,777,'no'::public.invite_resp) ON CONFLICT(event_id, user_id) DO UPDATE SET response = excluded.response RETURNING citus_table_alias.event_id, citus_table_alias.user_id, citus_table_alias.response +NOTICE: executing the command locally: INSERT INTO local_shard_execution.event_responses_1480001 AS citus_table_alias (event_id, user_id, response) VALUES (16,666,'maybe'::local_shard_execution.invite_resp), (17,777,'no'::local_shard_execution.invite_resp) ON CONFLICT(event_id, user_id) DO UPDATE SET response = excluded.response RETURNING citus_table_alias.event_id, citus_table_alias.user_id, citus_table_alias.response event_id | user_id | response --------------------------------------------------------------------- 16 | 666 | maybe @@ -2537,7 +2541,7 @@ SELECT * FROM event_responses_no_pkey WHERE event_id = 2; DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: query has a single distribution column value: 2 -NOTICE: executing the command locally: SELECT event_id, user_id, response FROM public.event_responses_no_pkey_1480007 event_responses_no_pkey WHERE (event_id OPERATOR(pg_catalog.=) 2) +NOTICE: executing the command locally: SELECT event_id, user_id, response FROM local_shard_execution.event_responses_no_pkey_1480007 event_responses_no_pkey WHERE (event_id OPERATOR(pg_catalog.=) 2) event_id | user_id | response --------------------------------------------------------------------- (0 rows) @@ -2546,7 +2550,7 @@ SELECT * FROM event_responses_no_pkey WHERE event_id = 1; DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 -NOTICE: issuing SELECT event_id, user_id, response FROM public.event_responses_no_pkey_1480004 event_responses_no_pkey WHERE (event_id OPERATOR(pg_catalog.=) 1) +NOTICE: issuing SELECT event_id, user_id, response FROM local_shard_execution.event_responses_no_pkey_1480004 event_responses_no_pkey WHERE (event_id OPERATOR(pg_catalog.=) 1) event_id | user_id | response --------------------------------------------------------------------- (0 rows) @@ -3258,6 +3262,7 @@ SELECT recover_prepared_transactions(); (1 row) \c - - - :master_port +SET search_path TO local_shard_execution; -- verify the local_hostname guc is used for local executions that should connect to the -- local host ALTER SYSTEM SET citus.local_hostname TO 'foobar'; diff --git a/src/test/regress/expected/local_shard_execution_0.out b/src/test/regress/expected/local_shard_execution_0.out index 2d0d7f089..5350728aa 100644 --- a/src/test/regress/expected/local_shard_execution_0.out +++ b/src/test/regress/expected/local_shard_execution_0.out @@ -554,7 +554,7 @@ SELECT count(*) FROM second_distributed_table; 2 (1 row) -SELECT * FROM second_distributed_table; +SELECT * FROM second_distributed_table ORDER BY 1; key | value --------------------------------------------------------------------- 1 | 1 @@ -2031,6 +2031,7 @@ NOTICE: executing the command locally: SELECT count(*) AS count FROM ((SELECT f RESET client_min_messages; RESET citus.log_local_commands; \c - - - :master_port +SET search_path TO local_shard_execution; SET citus.next_shard_id TO 1480000; -- test both local and remote execution with custom type SET citus.shard_replication_factor TO 1; @@ -2353,7 +2354,9 @@ EXECUTE router_select_with_no_dist_key_filter('yes'); -- rest of the tests assume the table is empty TRUNCATE event_responses; CREATE OR REPLACE PROCEDURE register_for_event(p_event_id int, p_user_id int, p_choice invite_resp) -LANGUAGE plpgsql AS $fn$ +LANGUAGE plpgsql +SET search_path TO local_shard_execution +AS $fn$ BEGIN INSERT INTO event_responses VALUES (p_event_id, p_user_id, p_choice) ON CONFLICT (event_id, user_id) @@ -2386,6 +2389,7 @@ CALL register_for_event(16, 1, 'yes'); CALL register_for_event(16, 1, 'yes'); CALL register_for_event(16, 1, 'yes'); \c - - - :worker_2_port +SET search_path TO local_shard_execution; CALL register_for_event(16, 1, 'yes'); CALL register_for_event(16, 1, 'yes'); CALL register_for_event(16, 1, 'yes'); @@ -2409,16 +2413,16 @@ SET citus.log_local_commands TO ON; SET client_min_messages TO DEBUG2; CALL register_for_event(19, 1, 'yes'); DEBUG: not pushing down procedure to the same node -NOTICE: executing the command locally: INSERT INTO public.event_responses_1480001 AS citus_table_alias (event_id, user_id, response) VALUES (19, 1, 'yes'::public.invite_resp) ON CONFLICT(event_id, user_id) DO UPDATE SET response = excluded.response -NOTICE: executing the command locally: SELECT count(*) AS count FROM public.event_responses_1480001 event_responses WHERE (event_id OPERATOR(pg_catalog.=) 19) -NOTICE: executing the command locally: SELECT count(*) AS count FROM (SELECT NULL::integer AS event_id, NULL::integer AS user_id, NULL::public.invite_resp AS response WHERE false) event_responses(event_id, user_id, response) WHERE ((event_id OPERATOR(pg_catalog.=) 19) AND false) -NOTICE: executing the command locally: UPDATE public.event_responses_1480001 event_responses SET response = 'yes'::public.invite_resp WHERE (event_id OPERATOR(pg_catalog.=) 19) +NOTICE: executing the command locally: INSERT INTO local_shard_execution.event_responses_1480001 AS citus_table_alias (event_id, user_id, response) VALUES (19, 1, 'yes'::local_shard_execution.invite_resp) ON CONFLICT(event_id, user_id) DO UPDATE SET response = excluded.response +NOTICE: executing the command locally: SELECT count(*) AS count FROM local_shard_execution.event_responses_1480001 event_responses WHERE (event_id OPERATOR(pg_catalog.=) 19) +NOTICE: executing the command locally: SELECT count(*) AS count FROM (SELECT NULL::integer AS event_id, NULL::integer AS user_id, NULL::local_shard_execution.invite_resp AS response WHERE false) event_responses(event_id, user_id, response) WHERE ((event_id OPERATOR(pg_catalog.=) 19) AND false) +NOTICE: executing the command locally: UPDATE local_shard_execution.event_responses_1480001 event_responses SET response = 'yes'::local_shard_execution.invite_resp WHERE (event_id OPERATOR(pg_catalog.=) 19) -- should be fine even if no parameters exists in the query SELECT count(*) FROM event_responses WHERE event_id = 16; DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: query has a single distribution column value: 16 -NOTICE: executing the command locally: SELECT count(*) AS count FROM public.event_responses_1480001 event_responses WHERE (event_id OPERATOR(pg_catalog.=) 16) +NOTICE: executing the command locally: SELECT count(*) AS count FROM local_shard_execution.event_responses_1480001 event_responses WHERE (event_id OPERATOR(pg_catalog.=) 16) count --------------------------------------------------------------------- 1 @@ -2428,7 +2432,7 @@ SELECT count(*) FROM event_responses WHERE event_id = 16; DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: query has a single distribution column value: 16 -NOTICE: executing the command locally: SELECT count(*) AS count FROM public.event_responses_1480001 event_responses WHERE (event_id OPERATOR(pg_catalog.=) 16) +NOTICE: executing the command locally: SELECT count(*) AS count FROM local_shard_execution.event_responses_1480001 event_responses WHERE (event_id OPERATOR(pg_catalog.=) 16) count --------------------------------------------------------------------- 1 @@ -2438,13 +2442,13 @@ UPDATE event_responses SET response = 'no' WHERE event_id = 16; DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: query has a single distribution column value: 16 -NOTICE: executing the command locally: UPDATE public.event_responses_1480001 event_responses SET response = 'no'::public.invite_resp WHERE (event_id OPERATOR(pg_catalog.=) 16) +NOTICE: executing the command locally: UPDATE local_shard_execution.event_responses_1480001 event_responses SET response = 'no'::local_shard_execution.invite_resp WHERE (event_id OPERATOR(pg_catalog.=) 16) INSERT INTO event_responses VALUES (16, 666, 'maybe') ON CONFLICT (event_id, user_id) DO UPDATE SET response = EXCLUDED.response RETURNING *; DEBUG: Creating router plan DEBUG: query has a single distribution column value: 16 -NOTICE: executing the command locally: INSERT INTO public.event_responses_1480001 AS citus_table_alias (event_id, user_id, response) VALUES (16, 666, 'maybe'::public.invite_resp) ON CONFLICT(event_id, user_id) DO UPDATE SET response = excluded.response RETURNING citus_table_alias.event_id, citus_table_alias.user_id, citus_table_alias.response +NOTICE: executing the command locally: INSERT INTO local_shard_execution.event_responses_1480001 AS citus_table_alias (event_id, user_id, response) VALUES (16, 666, 'maybe'::local_shard_execution.invite_resp) ON CONFLICT(event_id, user_id) DO UPDATE SET response = excluded.response RETURNING citus_table_alias.event_id, citus_table_alias.user_id, citus_table_alias.response event_id | user_id | response --------------------------------------------------------------------- 16 | 666 | maybe @@ -2455,7 +2459,7 @@ INSERT INTO event_responses VALUES (16, 666, 'maybe'), (17, 777, 'no') ON CONFLICT (event_id, user_id) DO UPDATE SET response = EXCLUDED.response RETURNING *; DEBUG: Creating router plan -NOTICE: executing the command locally: INSERT INTO public.event_responses_1480001 AS citus_table_alias (event_id, user_id, response) VALUES (16,666,'maybe'::public.invite_resp), (17,777,'no'::public.invite_resp) ON CONFLICT(event_id, user_id) DO UPDATE SET response = excluded.response RETURNING citus_table_alias.event_id, citus_table_alias.user_id, citus_table_alias.response +NOTICE: executing the command locally: INSERT INTO local_shard_execution.event_responses_1480001 AS citus_table_alias (event_id, user_id, response) VALUES (16,666,'maybe'::local_shard_execution.invite_resp), (17,777,'no'::local_shard_execution.invite_resp) ON CONFLICT(event_id, user_id) DO UPDATE SET response = excluded.response RETURNING citus_table_alias.event_id, citus_table_alias.user_id, citus_table_alias.response event_id | user_id | response --------------------------------------------------------------------- 16 | 666 | maybe @@ -2537,7 +2541,7 @@ SELECT * FROM event_responses_no_pkey WHERE event_id = 2; DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: query has a single distribution column value: 2 -NOTICE: executing the command locally: SELECT event_id, user_id, response FROM public.event_responses_no_pkey_1480007 event_responses_no_pkey WHERE (event_id OPERATOR(pg_catalog.=) 2) +NOTICE: executing the command locally: SELECT event_id, user_id, response FROM local_shard_execution.event_responses_no_pkey_1480007 event_responses_no_pkey WHERE (event_id OPERATOR(pg_catalog.=) 2) event_id | user_id | response --------------------------------------------------------------------- (0 rows) @@ -2546,7 +2550,7 @@ SELECT * FROM event_responses_no_pkey WHERE event_id = 1; DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 -NOTICE: issuing SELECT event_id, user_id, response FROM public.event_responses_no_pkey_1480004 event_responses_no_pkey WHERE (event_id OPERATOR(pg_catalog.=) 1) +NOTICE: issuing SELECT event_id, user_id, response FROM local_shard_execution.event_responses_no_pkey_1480004 event_responses_no_pkey WHERE (event_id OPERATOR(pg_catalog.=) 1) event_id | user_id | response --------------------------------------------------------------------- (0 rows) @@ -3258,6 +3262,7 @@ SELECT recover_prepared_transactions(); (1 row) \c - - - :master_port +SET search_path TO local_shard_execution; -- verify the local_hostname guc is used for local executions that should connect to the -- local host ALTER SYSTEM SET citus.local_hostname TO 'foobar'; diff --git a/src/test/regress/expected/local_shard_execution_replicated.out b/src/test/regress/expected/local_shard_execution_replicated.out index d12aa937d..7d36a5559 100644 --- a/src/test/regress/expected/local_shard_execution_replicated.out +++ b/src/test/regress/expected/local_shard_execution_replicated.out @@ -503,7 +503,7 @@ SELECT count(*) FROM second_distributed_table; 2 (1 row) -SELECT * FROM second_distributed_table; +SELECT * FROM second_distributed_table ORDER BY 1; key | value --------------------------------------------------------------------- 1 | 1 diff --git a/src/test/regress/expected/local_shard_execution_replicated_0.out b/src/test/regress/expected/local_shard_execution_replicated_0.out index 7a0a77ece..759d842fd 100644 --- a/src/test/regress/expected/local_shard_execution_replicated_0.out +++ b/src/test/regress/expected/local_shard_execution_replicated_0.out @@ -503,7 +503,7 @@ SELECT count(*) FROM second_distributed_table; 2 (1 row) -SELECT * FROM second_distributed_table; +SELECT * FROM second_distributed_table ORDER BY 1; key | value --------------------------------------------------------------------- 1 | 1 diff --git a/src/test/regress/expected/logical_replication.out b/src/test/regress/expected/logical_replication.out index 0b2585bfb..79108dd11 100644 --- a/src/test/regress/expected/logical_replication.out +++ b/src/test/regress/expected/logical_replication.out @@ -25,7 +25,9 @@ NOTICE: localhost:xxxxx is the coordinator and already contains metadata, skipp -- This allows us to test the cleanup logic at the start of the shard move. \c - - - :worker_1_port SET search_path TO logical_replication; +SET citus.enable_ddl_propagation TO off; CREATE PUBLICATION citus_shard_move_publication_:postgres_oid FOR TABLE dist_6830000; +RESET citus.enable_ddl_propagation; \c - - - :master_port SET search_path TO logical_replication; CREATE TABLE dist_6830000( @@ -155,6 +157,13 @@ SELECT count(*) from dist; 100 (1 row) +DROP PUBLICATION citus_shard_move_publication_:postgres_oid; +SELECT pg_drop_replication_slot('citus_shard_move_slot_' || :postgres_oid); + pg_drop_replication_slot +--------------------------------------------------------------------- + +(1 row) + \c - - - :worker_2_port SET search_path TO logical_replication; SELECT count(*) from pg_subscription; @@ -188,3 +197,9 @@ ALTER SUBSCRIPTION citus_shard_move_subscription_:postgres_oid DISABLE; ALTER SUBSCRIPTION citus_shard_move_subscription_:postgres_oid SET (slot_name = NONE); DROP SUBSCRIPTION citus_shard_move_subscription_:postgres_oid; DROP SCHEMA logical_replication CASCADE; +SELECT public.wait_for_resource_cleanup(); + wait_for_resource_cleanup +--------------------------------------------------------------------- + +(1 row) + diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out index 6fc472b70..412667037 100644 --- a/src/test/regress/expected/merge.out +++ b/src/test/regress/expected/merge.out @@ -17,7 +17,9 @@ CREATE SCHEMA merge_schema; SET search_path TO merge_schema; SET citus.shard_count TO 4; SET citus.next_shard_id TO 4000000; -SET citus.explain_all_tasks to true; +SET citus.explain_all_tasks TO true; +SET citus.shard_replication_factor TO 1; +SET citus.max_adaptive_executor_pool_size TO 1; SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0); NOTICE: localhost:xxxxx is the coordinator and already contains metadata, skipping syncing the metadata ?column? @@ -214,9 +216,45 @@ HINT: To remove the local data, run: SELECT truncate_local_data_after_distribut (1 row) +-- Updates one of the row with customer_id = 30002 +SELECT * from target t WHERE t.customer_id = 30002; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- + 30002 | 103 | AX | -1 | Sun Jan 17 19:53:00 2021 +(1 row) + +-- Turn on notice to print tasks sent to nodes +SET citus.log_remote_commands to true; MERGE INTO target t USING source s - ON (t.customer_id = s.customer_id) + ON (t.customer_id = s.customer_id) AND t.customer_id = 30002 + WHEN MATCHED AND t.order_center = 'XX' THEN + DELETE + WHEN MATCHED THEN + UPDATE SET -- Existing customer, update the order count and last_order_id + order_count = t.order_count + 1, + last_order_id = s.order_id + WHEN NOT MATCHED THEN + DO NOTHING; +NOTICE: issuing MERGE INTO merge_schema.target_xxxxxxx t USING merge_schema.source_xxxxxxx s ON ((t.customer_id OPERATOR(pg_catalog.=) s.customer_id) AND (t.customer_id OPERATOR(pg_catalog.=) 30002)) WHEN MATCHED AND ((t.order_center COLLATE "default") OPERATOR(pg_catalog.=) 'XX'::text) THEN DELETE WHEN MATCHED THEN UPDATE SET last_order_id = s.order_id, order_count = (t.order_count OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT * from target t WHERE t.customer_id = 30002; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- + 30002 | 103 | AX | 0 | Sun Jan 17 19:53:00 2021 +(1 row) + +-- Deletes one of the row with customer_id = 30004 +SELECT * from target t WHERE t.customer_id = 30004; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- + 30004 | 99 | XX | -1 | Fri Sep 11 03:23:00 2020 +(1 row) + +MERGE INTO target t + USING source s + ON (t.customer_id = s.customer_id) AND t.customer_id = 30004 WHEN MATCHED AND t.order_center = 'XX' THEN DELETE WHEN MATCHED THEN @@ -226,7 +264,34 @@ MERGE INTO target t WHEN NOT MATCHED THEN -- New entry, record it. INSERT (customer_id, last_order_id, order_center, order_count, last_order) VALUES (customer_id, s.order_id, s.order_center, 123, s.order_time); -ERROR: MERGE command is not supported on distributed/reference tables yet +SELECT * from target t WHERE t.customer_id = 30004; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- +(0 rows) + +-- Updating distribution column is allowed if the operation is a no-op +SELECT * from target t WHERE t.customer_id = 30000; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- + 30000 | 101 | WX | 123 | Sat Jan 01 00:00:00 2022 +(1 row) + +MERGE INTO target t +USING SOURCE s +ON (t.customer_id = s.customer_id AND t.customer_id = 30000) +WHEN MATCHED THEN + UPDATE SET customer_id = 30000; +MERGE INTO target t +USING SOURCE s +ON (t.customer_id = s.customer_id AND t.customer_id = 30000) +WHEN MATCHED THEN + UPDATE SET customer_id = t.customer_id; +SELECT * from target t WHERE t.customer_id = 30000; + customer_id | last_order_id | order_center | order_count | last_order +--------------------------------------------------------------------- + 30000 | 101 | WX | 123 | Sat Jan 01 00:00:00 2022 +(1 row) + -- -- Test MERGE with CTE as source -- @@ -269,7 +334,6 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (pg_res.id, pg_res.val); --- Two rows with id 2 and val incremented, id 3, and id 1 is deleted SELECT * FROM t1 order by id; id | val --------------------------------------------------------------------- @@ -386,18 +450,61 @@ HINT: To remove the local data, run: SELECT truncate_local_data_after_distribut (1 row) +SELECT * FROM t1 order by id; + id | val +--------------------------------------------------------------------- + 1 | 0 + 2 | 0 + 5 | 0 +(3 rows) + +SET citus.log_remote_commands to true; WITH s1_res AS ( SELECT * FROM s1 ) MERGE INTO t1 - USING s1_res ON (s1_res.id = t1.id) + USING s1_res ON (s1_res.id = t1.id) AND t1.id = 6 WHEN MATCHED AND s1_res.val = 0 THEN DELETE WHEN MATCHED THEN UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx' +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx' +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx' +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx' +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +-- Other than id 6 everything else is a NO match, and should appear in target +SELECT * FROM t1 order by 1, 2; + id | val +--------------------------------------------------------------------- + 1 | 0 + 1 | 0 + 2 | 0 + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 0 + 6 | 1 +(8 rows) + -- -- Test with multiple join conditions -- @@ -553,16 +660,38 @@ HINT: To remove the local data, run: SELECT truncate_local_data_after_distribut (1 row) +SELECT * FROM t2 ORDER BY 1; + id | val | src +--------------------------------------------------------------------- + 1 | 0 | target + 2 | 0 | target + 3 | 1 | match + 4 | 0 | match +(4 rows) + +SET citus.log_remote_commands to true; MERGE INTO t2 USING s2 -ON t2.id = s2.id AND t2.src = s2.src +ON t2.id = s2.id AND t2.src = s2.src AND t2.id = 4 WHEN MATCHED AND t2.val = 1 THEN UPDATE SET val = s2.val + 10 WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN - INSERT (id, val, src) VALUES (s2.id, s2.val, s2.src); -ERROR: MERGE command is not supported on distributed/reference tables yet + DO NOTHING; +NOTICE: issuing MERGE INTO merge_schema.t2_xxxxxxx t2 USING merge_schema.s2_xxxxxxx s2 ON ((t2.id OPERATOR(pg_catalog.=) s2.id) AND (t2.src OPERATOR(pg_catalog.=) s2.src) AND (t2.id OPERATOR(pg_catalog.=) 4)) WHEN MATCHED AND (t2.val OPERATOR(pg_catalog.=) 1) THEN UPDATE SET val = (s2.val OPERATOR(pg_catalog.+) 10) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +-- Row with id = 4 is a match for delete clause, row should be deleted +-- Row with id = 3 is a NO match, row from source will be inserted +SELECT * FROM t2 ORDER BY 1; + id | val | src +--------------------------------------------------------------------- + 1 | 0 | target + 2 | 0 | target + 3 | 1 | match +(3 rows) + -- -- With sub-query as the MERGE source -- @@ -943,7 +1072,8 @@ WHEN MATCHED THEN UPDATE SET value = vl_source.value, id = vl_target.id + 1 WHEN NOT MATCHED THEN INSERT VALUES(vl_source.ID, vl_source.value); -DEBUG: +DEBUG: Creating MERGE router plan +DEBUG: RESET client_min_messages; SELECT * INTO vl_local FROM vl_target ORDER BY 1 ; -- Should be equal @@ -996,7 +1126,8 @@ WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED THEN INSERT VALUES(rs_source.id); -DEBUG: +DEBUG: Creating MERGE router plan +DEBUG: RESET client_min_messages; SELECT * INTO rs_local FROM rs_target ORDER BY 1 ; -- Should be equal @@ -1094,7 +1225,8 @@ END; $$ language plpgsql volatile; CREATE TABLE fn_target(id int, data varchar); MERGE INTO fn_target -USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +USING (SELECT id, source FROM dist_table) as fn_source ON fn_source.id = fn_target.id WHEN MATCHED THEN DO NOTHING @@ -1110,29 +1242,23 @@ SELECT citus_add_local_table_to_metadata('fn_target'); (1 row) -SELECT create_distributed_table('dist_table', 'id'); -NOTICE: Copying data from local table... -NOTICE: copying the data has completed -DETAIL: The local data in the table is no longer visible, but is still on disk. -HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.dist_table$$) - create_distributed_table +SELECT citus_add_local_table_to_metadata('dist_table'); + citus_add_local_table_to_metadata --------------------------------------------------------------------- (1 row) SET client_min_messages TO DEBUG1; MERGE INTO fn_target -USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +USING (SELECT id, source FROM dist_table) as fn_source ON fn_source.id = fn_target.id WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED THEN INSERT VALUES(fn_source.id, fn_source.source); -DEBUG: function does not have co-located tables -DEBUG: generating subplan XXX_1 for subquery SELECT id, source FROM merge_schema.f_dist() f(id integer, source character varying) -DEBUG: -DEBUG: Plan XXX query after replacing subqueries and CTEs: MERGE INTO merge_schema.fn_target USING (SELECT intermediate_result.id, intermediate_result.source FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, source character varying)) fn_source ON (fn_source.id OPERATOR(pg_catalog.=) fn_target.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED THEN INSERT (id, data) VALUES (fn_source.id, fn_source.source) -DEBUG: +DEBUG: Creating MERGE router plan +DEBUG: RESET client_min_messages; SELECT * INTO fn_local FROM fn_target ORDER BY 1 ; -- Should be equal @@ -1204,7 +1330,8 @@ MERGE INTO ft_target DELETE WHEN NOT MATCHED THEN INSERT (id, user_val) VALUES (foreign_table.id, foreign_table.user_val); -DEBUG: +DEBUG: Creating MERGE router plan +DEBUG: RESET client_min_messages; SELECT * FROM ft_target; id | user_val @@ -1213,9 +1340,1181 @@ SELECT * FROM ft_target; 3 | source (2 rows) +-- +-- complex joins on the source side +-- +-- source(join of two relations) relation is an unaliased join +CREATE TABLE target_cj(tid int, src text, val int); +CREATE TABLE source_cj1(sid1 int, src1 text, val1 int); +CREATE TABLE source_cj2(sid2 int, src2 text, val2 int); +INSERT INTO target_cj VALUES (1, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (3, 'target', 0); +INSERT INTO source_cj1 VALUES (2, 'source-1', 10); +INSERT INTO source_cj2 VALUES (2, 'source-2', 20); +BEGIN; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +-- Gold result to compare against +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-2 | 0 + 2 | source-2 | 0 + 3 | target | 0 +(4 rows) + +ROLLBACK; +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET tid = sid2, src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +-- Gold result to compare against +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-1 | 20 + 2 | source-1 | 20 + 3 | target | 0 +(4 rows) + +ROLLBACK; +-- Test the same scenarios with distributed tables +SELECT create_distributed_table('target_cj', 'tid'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.target_cj$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('source_cj1', 'sid1'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_cj1$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('source_cj2', 'sid2'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_cj2$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.target_cj_xxxxxxx t USING (merge_schema.source_cj1_xxxxxxx s1 JOIN merge_schema.source_cj2_xxxxxxx s2 ON ((s1.sid1 OPERATOR(pg_catalog.=) s2.sid2))) ON ((t.tid OPERATOR(pg_catalog.=) s1.sid1) AND (t.tid OPERATOR(pg_catalog.=) 2)) WHEN MATCHED THEN UPDATE SET src = s2.src2 WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-2 | 0 + 2 | source-2 | 0 + 3 | target | 0 +(4 rows) + +ROLLBACK; +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-1 | 20 + 2 | source-1 | 20 + 3 | target | 0 +(4 rows) + +ROLLBACK; +-- sub-query as a source +BEGIN; +MERGE INTO target_cj t +USING (SELECT * FROM source_cj1 WHERE sid1 = 2) sub +ON t.tid = sub.sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = sub.src1, val = val1 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-1 | 10 + 2 | source-1 | 10 + 3 | target | 0 +(4 rows) + +ROLLBACK; +-- Test self-join +BEGIN; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | target | 0 + 2 | target | 0 + 3 | target | 0 +(4 rows) + +set citus.log_remote_commands to true; +MERGE INTO target_cj t1 +USING (SELECT * FROM target_cj) sub +ON t1.tid = sub.tid AND t1.tid = 3 +WHEN MATCHED THEN + UPDATE SET src = sub.src, val = sub.val + 100 +WHEN NOT MATCHED THEN + DO NOTHING; +NOTICE: issuing MERGE INTO merge_schema.target_cj_xxxxxxx t1 USING (SELECT target_cj.tid, target_cj.src, target_cj.val FROM merge_schema.target_cj_xxxxxxx target_cj) sub ON ((t1.tid OPERATOR(pg_catalog.=) sub.tid) AND (t1.tid OPERATOR(pg_catalog.=) 3)) WHEN MATCHED THEN UPDATE SET src = sub.src, val = (sub.val OPERATOR(pg_catalog.+) 100) WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +set citus.log_remote_commands to false; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | target | 0 + 2 | target | 0 + 3 | target | 100 +(4 rows) + +ROLLBACK; +-- Test PREPARE +PREPARE foo(int) AS +MERGE INTO target_cj target +USING (SELECT * FROM source_cj1) sub +ON target.tid = sub.sid1 AND target.tid = $1 +WHEN MATCHED THEN + UPDATE SET val = sub.val1 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | target | 0 + 2 | target | 0 + 3 | target | 0 +(4 rows) + +BEGIN; +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | target | 10 + 2 | target | 10 + 3 | target | 0 +(4 rows) + +ROLLBACK; +BEGIN; +SET citus.log_remote_commands to true; +SET client_min_messages TO DEBUG1; +EXECUTE foo(2); +DEBUG: +DEBUG: +DEBUG: +DEBUG: +DEBUG: Creating MERGE router plan +DEBUG: +DEBUG: Creating MERGE router plan +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.target_cj_xxxxxxx target USING (SELECT source_cj1.sid1, source_cj1.src1, source_cj1.val1 FROM merge_schema.source_cj1_xxxxxxx source_cj1) sub ON ((target.tid OPERATOR(pg_catalog.=) sub.sid1) AND (target.tid OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = sub.val1 WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +RESET client_min_messages; +EXECUTE foo(2); +NOTICE: issuing MERGE INTO merge_schema.target_cj_xxxxxxx target USING (SELECT source_cj1.sid1, source_cj1.src1, source_cj1.val1 FROM merge_schema.source_cj1_xxxxxxx source_cj1) sub ON ((target.tid OPERATOR(pg_catalog.=) sub.sid1) AND (target.tid OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = sub.val1 WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | target | 10 + 2 | target | 10 + 3 | target | 0 +(4 rows) + +ROLLBACK; +-- Test distributed tables, must be co-located and joined on distribution column. +-- +-- We create two sets of source and target tables, one set is Postgres and the other +-- is Citus distributed. Run the _exact_ MERGE SQL on both the sets and compare the +-- final results of target tables of Postgres and Citus, the result should match. +-- This is repeated for various MERGE SQL combinations +-- +CREATE TABLE pg_target(id int, val varchar); +CREATE TABLE pg_source(id int, val varchar); +CREATE TABLE citus_target(id int, val varchar); +CREATE TABLE citus_source(id int, val varchar); +-- Half of the source rows do not match +INSERT INTO pg_target SELECT i, 'target' FROM generate_series(250, 500) i; +INSERT INTO pg_source SELECT i, 'source' FROM generate_series(1, 500) i; +INSERT INTO citus_target SELECT i, 'target' FROM generate_series(250, 500) i; +INSERT INTO citus_source SELECT i, 'source' FROM generate_series(1, 500) i; +SELECT create_distributed_table('citus_target', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.citus_target$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('citus_source', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.citus_source$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- +-- This routine compares the target tables of Postgres and Citus and +-- returns true if they match, false if the results do not match. +-- +CREATE OR REPLACE FUNCTION compare_tables() RETURNS BOOLEAN AS $$ +DECLARE ret BOOL; +BEGIN +SELECT count(1) = 0 INTO ret + FROM pg_target + FULL OUTER JOIN citus_target + USING (id, val) + WHERE pg_target.id IS NULL + OR citus_target.id IS NULL; +RETURN ret; +END +$$ LANGUAGE PLPGSQL; +-- Make sure we start with exact data in Postgres and Citus +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +-- Run the MERGE on both Postgres and Citus, and compare the final target tables +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- +-- ON clause filter on source +-- +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- +-- ON clause filter on target +-- +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND t.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND t.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- +-- NOT MATCHED clause filter on source +-- +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id +WHEN MATCHED THEN + DO NOTHING +WHEN NOT MATCHED AND s.id < 100 THEN + INSERT VALUES(s.id, s.val); +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN MATCHED THEN + DO NOTHING +WHEN NOT MATCHED AND s.id < 100 THEN + INSERT VALUES(s.id, s.val); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- +-- Test constant filter in ON clause to check if shards are pruned +-- with restriction information +-- +-- +-- Though constant filter is present, this won't prune shards as +-- NOT MATCHED clause is present +-- +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- This will prune shards with restriction information as NOT MATCHED is void +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + DO NOTHING; +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + DO NOTHING; +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by Merge'::text) COLLATE "default") WHEN NOT MATCHED THEN DO NOTHING +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- Test CTE with distributed tables +CREATE VIEW pg_source_view AS SELECT * FROM pg_source WHERE id < 400; +WARNING: "view pg_source_view" has dependency to "table pg_source" that is not in Citus' metadata +DETAIL: "view pg_source_view" will be created only locally +HINT: Distribute "table pg_source" first to distribute "view pg_source_view" +CREATE VIEW citus_source_view AS SELECT * FROM citus_source WHERE id < 400; +BEGIN; +SEt citus.log_remote_commands to true; +WITH cte AS ( + SELECT * FROM pg_source_view +) +MERGE INTO pg_target t +USING cte +ON cte.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by CTE' +WHEN NOT MATCHED THEN + INSERT VALUES (cte.id, cte.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +WITH cte AS ( + SELECT * FROM citus_source_view +) +MERGE INTO citus_target t +USING cte +ON cte.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by CTE' +WHEN NOT MATCHED THEN + INSERT VALUES (cte.id, cte.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by CTE'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by CTE'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by CTE'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by CTE'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- Test sub-query with distributed tables +BEGIN; +SEt citus.log_remote_commands to true; +MERGE INTO pg_target t +USING (SELECT * FROM pg_source) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by subquery' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, subq.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +MERGE INTO citus_target t +USING (SELECT * FROM citus_source) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by subquery' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, subq.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by subquery'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by subquery'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by subquery'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = (((t.val COLLATE "default") OPERATOR(pg_catalog.||) 'Updated by subquery'::text) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- Test PREPARE +PREPARE pg_prep(int) AS +MERGE INTO pg_target +USING (SELECT * FROM pg_source) sub +ON pg_target.id = sub.id AND pg_target.id = $1 +WHEN MATCHED THEN + UPDATE SET val = 'Updated by prepare using ' || sub.val +WHEN NOT MATCHED THEN + INSERT VALUES (sub.id, sub.val); +PREPARE citus_prep(int) AS +MERGE INTO citus_target +USING (SELECT * FROM citus_source) sub +ON citus_target.id = sub.id AND citus_target.id = $1 +WHEN MATCHED THEN + UPDATE SET val = 'Updated by prepare using ' || sub.val +WHEN NOT MATCHED THEN + INSERT VALUES (sub.id, sub.val); +BEGIN; +SELECT * FROM pg_target WHERE id = 500; -- before merge + id | val +--------------------------------------------------------------------- + 500 | target +(1 row) + +SELECT count(*) FROM pg_target; -- before merge + count +--------------------------------------------------------------------- + 251 +(1 row) + +EXECUTE pg_prep(500); +SELECT * FROM pg_target WHERE id = 500; -- non-cached + id | val +--------------------------------------------------------------------- + 500 | Updated by prepare using source +(1 row) + +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +SELECT * FROM pg_target WHERE id = 500; -- cached + id | val +--------------------------------------------------------------------- + 500 | Updated by prepare using source +(1 row) + +SELECT count(*) FROM pg_target; -- cached + count +--------------------------------------------------------------------- + 3245 +(1 row) + +SELECT * FROM citus_target WHERE id = 500; -- before merge + id | val +--------------------------------------------------------------------- + 500 | target +(1 row) + +SELECT count(*) FROM citus_target; -- before merge + count +--------------------------------------------------------------------- + 251 +(1 row) + +SET citus.log_remote_commands to true; +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SELECT * FROM citus_target WHERE id = 500; -- non-cached +NOTICE: issuing SELECT id, val FROM merge_schema.citus_target_xxxxxxx citus_target WHERE (id OPERATOR(pg_catalog.=) 500) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx + id | val +--------------------------------------------------------------------- + 500 | Updated by prepare using source +(1 row) + +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +EXECUTE citus_prep(500); +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = (('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val COLLATE "default")) COLLATE "default") WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val) +DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx +SET citus.log_remote_commands to false; +SELECT * FROM citus_target WHERE id = 500; -- cached + id | val +--------------------------------------------------------------------- + 500 | Updated by prepare using source +(1 row) + +SELECT count(*) FROM citus_target; -- cached + count +--------------------------------------------------------------------- + 3245 +(1 row) + +SELECT compare_tables(); + compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- Test partitions + distributed tables +CREATE TABLE pg_pa_target (tid integer, balance float, val text) + PARTITION BY LIST (tid); +CREATE TABLE citus_pa_target (tid integer, balance float, val text) + PARTITION BY LIST (tid); +CREATE TABLE part1 PARTITION OF pg_pa_target FOR VALUES IN (1,4) + WITH (autovacuum_enabled=off); +CREATE TABLE part2 PARTITION OF pg_pa_target FOR VALUES IN (2,5,6) + WITH (autovacuum_enabled=off); +CREATE TABLE part3 PARTITION OF pg_pa_target FOR VALUES IN (3,8,9) + WITH (autovacuum_enabled=off); +CREATE TABLE part4 PARTITION OF pg_pa_target DEFAULT + WITH (autovacuum_enabled=off); +CREATE TABLE part5 PARTITION OF citus_pa_target FOR VALUES IN (1,4) + WITH (autovacuum_enabled=off); +CREATE TABLE part6 PARTITION OF citus_pa_target FOR VALUES IN (2,5,6) + WITH (autovacuum_enabled=off); +CREATE TABLE part7 PARTITION OF citus_pa_target FOR VALUES IN (3,8,9) + WITH (autovacuum_enabled=off); +CREATE TABLE part8 PARTITION OF citus_pa_target DEFAULT + WITH (autovacuum_enabled=off); +CREATE TABLE pg_pa_source (sid integer, delta float); +CREATE TABLE citus_pa_source (sid integer, delta float); +-- insert many rows to the source table +INSERT INTO pg_pa_source SELECT id, id * 10 FROM generate_series(1,14) AS id; +INSERT INTO citus_pa_source SELECT id, id * 10 FROM generate_series(1,14) AS id; +-- insert a few rows in the target table (odd numbered tid) +INSERT INTO pg_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id; +INSERT INTO citus_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id; +SELECT create_distributed_table('citus_pa_target', 'tid'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part5$$) +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part6$$) +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part7$$) +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part8$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('citus_pa_source', 'sid'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.citus_pa_source$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE OR REPLACE FUNCTION pa_compare_tables() RETURNS BOOLEAN AS $$ +DECLARE ret BOOL; +BEGIN +SELECT count(1) = 0 INTO ret + FROM pg_pa_target + FULL OUTER JOIN citus_pa_target + USING (tid, balance, val) + WHERE pg_pa_target.tid IS NULL + OR citus_pa_target.tid IS NULL; +RETURN ret; +END +$$ LANGUAGE PLPGSQL; +-- try simple MERGE +BEGIN; +MERGE INTO pg_pa_target t + USING pg_pa_source s + ON t.tid = s.sid + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); +MERGE INTO citus_pa_target t + USING citus_pa_source s + ON t.tid = s.sid + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); +SELECT pa_compare_tables(); + pa_compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- same with a constant qual +BEGIN; +MERGE INTO pg_pa_target t + USING pg_pa_source s + ON t.tid = s.sid AND tid = 1 + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); +MERGE INTO citus_pa_target t + USING citus_pa_source s + ON t.tid = s.sid AND tid = 1 + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); +SELECT pa_compare_tables(); + pa_compare_tables +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +CREATE TABLE source_json( id integer, z int, d jsonb); +CREATE TABLE target_json( id integer, z int, d jsonb); +INSERT INTO source_json SELECT i,i FROM generate_series(0,5)i; +SELECT create_distributed_table('target_json','id'), create_distributed_table('source_json', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_json$$) + create_distributed_table | create_distributed_table +--------------------------------------------------------------------- + | +(1 row) + +-- single shard query given source_json is filtered and Postgres is smart to pushdown +-- filter to the target_json as well +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING (SELECT * FROM source_json WHERE id = 1) sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (sdn.id, 5); +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) (actual rows=0 loops=1) + Task Count: 1 +(2 rows) + +SELECT * FROM target_json ORDER BY 1; + id | z | d +--------------------------------------------------------------------- + 1 | 5 | +(1 row) + +-- zero shard query as filters do not match +--SELECT public.coordinator_plan($Q$ +--EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +--USING (SELECT * FROM source_json WHERE id = 1) sdn +--ON sda.id = sdn.id AND sda.id = 2 +--WHEN NOT matched THEN +-- INSERT (id, z) VALUES (sdn.id, 5); +--$Q$); +--SELECT * FROM target_json ORDER BY 1; +-- join for source_json is happening at a different place +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING source_json s1 LEFT JOIN (SELECT * FROM source_json) s2 USING(z) +ON sda.id = s1.id AND s1.id = s2.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (s2.id, 5); +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) (actual rows=0 loops=1) + Task Count: 4 +(2 rows) + +SELECT * FROM target_json ORDER BY 1; + id | z | d +--------------------------------------------------------------------- + 0 | 5 | + 1 | 5 | + 2 | 5 | + 3 | 5 | + 4 | 5 | + 5 | 5 | +(6 rows) + +-- update JSON column +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN matched THEN + UPDATE SET d = '{"a" : 5}'; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) (actual rows=0 loops=1) + Task Count: 4 +(2 rows) + +SELECT * FROM target_json ORDER BY 1; + id | z | d +--------------------------------------------------------------------- + 0 | 5 | {"a": 5} + 1 | 5 | {"a": 5} + 2 | 5 | {"a": 5} + 3 | 5 | {"a": 5} + 4 | 5 | {"a": 5} + 5 | 5 | {"a": 5} +(6 rows) + +CREATE FUNCTION immutable_hash(int) RETURNS int +AS 'SELECT hashtext( ($1 + $1)::text);' +LANGUAGE SQL +IMMUTABLE +RETURNS NULL ON NULL INPUT; +MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN matched THEN + UPDATE SET z = immutable_hash(sdn.z); +-- Test bigserial +CREATE TABLE source_serial (id integer, z int, d bigserial); +CREATE TABLE target_serial (id integer, z int, d bigserial); +INSERT INTO source_serial SELECT i,i FROM generate_series(0,100)i; +SELECT create_distributed_table('source_serial', 'id'), + create_distributed_table('target_serial', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_serial$$) + create_distributed_table | create_distributed_table +--------------------------------------------------------------------- + | +(1 row) + +MERGE INTO target_serial sda +USING source_serial sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (id, z); +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables +SELECT count(*) from source_serial; + count +--------------------------------------------------------------------- + 101 +(1 row) + +SELECT count(*) from target_serial; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(distinct d) from source_serial; + count +--------------------------------------------------------------------- + 101 +(1 row) + +SELECT count(distinct d) from target_serial; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Test set operations +CREATE TABLE target_set(t1 int, t2 int); +CREATE TABLE source_set(s1 int, s2 int); +SELECT create_distributed_table('target_set', 't1'), + create_distributed_table('source_set', 's1'); + create_distributed_table | create_distributed_table +--------------------------------------------------------------------- + | +(1 row) + +INSERT INTO target_set VALUES(1, 0); +INSERT INTO source_set VALUES(1, 1); +INSERT INTO source_set VALUES(2, 2); +MERGE INTO target_set +USING (SELECT * FROM source_set UNION SELECT * FROM source_set) AS foo ON target_set.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 100 +WHEN NOT MATCHED THEN + INSERT VALUES(foo.s1); +SELECT * FROM target_set ORDER BY 1, 2; + t1 | t2 +--------------------------------------------------------------------- + 1 | 100 + 2 | +(2 rows) + -- -- Error and Unsupported scenarios -- +MERGE INTO target_set +USING source_set AS foo ON target_set.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET ctid = '(0,100)'; +ERROR: cannot assign to system column "ctid" +MERGE INTO target_set +USING (SELECT s1,s2 FROM source_set UNION SELECT s2,s1 FROM source_set) AS foo ON target_set.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; +ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position +DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column +MERGE INTO target_set +USING (SELECT 2 as s3, source_set.* FROM (SELECT * FROM source_set LIMIT 1) as foo LEFT JOIN source_set USING( s1)) AS foo +ON target_set.t1 = foo.s1 +WHEN MATCHED THEN UPDATE SET t2 = t2 + 1 +WHEN NOT MATCHED THEN INSERT VALUES(s1, s3); +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a subquery references a column from another query +-- modifying CTE not supported +EXPLAIN +WITH cte_1 AS (DELETE FROM target_json) +MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (sdn.id, 5); +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns +-- Grouping sets not supported +MERGE INTO citus_target t +USING (SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val)) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, 99) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +ERROR: cannot push down this subquery +DETAIL: could not run distributed query with GROUPING SETS, CUBE, or ROLLUP +WITH subq AS +( +SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val) +) +MERGE INTO citus_target t +USING subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, 99) +WHEN MATCHED AND t.id < 350 THEN + DELETE; +ERROR: cannot push down this subquery +DETAIL: could not run distributed query with GROUPING SETS, CUBE, or ROLLUP +-- try inserting unmatched distribution column value +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT DEFAULT VALUES; +ERROR: cannot perform MERGE INSERT with DEFAULTS +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT VALUES(10000); +ERROR: MERGE INSERT must refer a source column for distribution column +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (id) VALUES(1000); +ERROR: MERGE INSERT must refer a source column for distribution column +MERGE INTO t1 t +USING s1 s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (id) VALUES(s.val); +ERROR: MERGE INSERT must use the source table distribution column value +MERGE INTO t1 t +USING s1 s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (val) VALUES(s.val); +ERROR: MERGE INSERT must have distribution column as value +-- try updating the distribution key column +BEGIN; +MERGE INTO target_cj t + USING source_cj1 s + ON t.tid = s.sid1 AND t.tid = 2 + WHEN MATCHED THEN + UPDATE SET tid = tid + 9, src = src || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid1, 'inserted by merge', val1); +ERROR: updating the distribution column is not allowed in MERGE actions +ROLLBACK; -- Foreign table as target MERGE INTO foreign_table USING ft_target ON (foreign_table.id = ft_target.id) @@ -1274,7 +2573,72 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1.id, s1.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported with combination of distributed/local tables yet +-- Now both s1 and t1 are distributed tables +SELECT undistribute_table('t1'); +NOTICE: creating a new table for merge_schema.t1 +NOTICE: moving the data of merge_schema.t1 +NOTICE: dropping the old merge_schema.t1 +NOTICE: renaming the new table to merge_schema.t1 + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('t1', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.t1$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- We have a potential pitfall where a function can be invoked in +-- the MERGE conditions which can insert/update to a random shard +CREATE OR REPLACE function merge_when_and_write() RETURNS BOOLEAN +LANGUAGE PLPGSQL AS +$$ +BEGIN + INSERT INTO t1 VALUES (100, 100); + RETURN TRUE; +END; +$$; +-- Test functions executing in MERGE statement. This is to prevent the functions from +-- doing a random sql, which may be executed in a remote node or modifying the target +-- relation which will have unexpected/suprising results. +MERGE INTO t1 USING (SELECT * FROM s1 WHERE true) s1 ON + t1.id = s1.id AND s1.id = 2 + WHEN matched THEN + UPDATE SET id = s1.id, val = random(); +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables +-- Test STABLE function +CREATE FUNCTION add_s(integer, integer) RETURNS integer +AS 'select $1 + $2;' +LANGUAGE SQL +STABLE RETURNS NULL ON NULL INPUT; +MERGE INTO t1 +USING s1 ON t1.id = s1.id +WHEN NOT MATCHED THEN + INSERT VALUES(s1.id, add_s(s1.val, 2)); +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables +-- Test preventing "ON" join condition from writing to the database +BEGIN; +MERGE INTO t1 +USING s1 ON t1.id = s1.id AND t1.id = 2 AND (merge_when_and_write()) +WHEN MATCHED THEN + UPDATE SET val = t1.val + s1.val; +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables +ROLLBACK; +-- Test preventing WHEN clause(s) from writing to the database +BEGIN; +MERGE INTO t1 +USING s1 ON t1.id = s1.id AND t1.id = 2 +WHEN MATCHED AND (merge_when_and_write()) THEN + UPDATE SET val = t1.val + s1.val; +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables +ROLLBACK; -- Joining on partition columns with sub-query MERGE INTO t1 USING (SELECT * FROM s1) sub ON (sub.val = t1.id) -- sub.val is not a distribution column @@ -1284,7 +2648,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns -- Joining on partition columns with CTE WITH s1_res AS ( SELECT * FROM s1 @@ -1297,7 +2661,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns -- Constant Join condition WITH s1_res AS ( SELECT * FROM s1 @@ -1310,7 +2674,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns -- With a single WHEN clause, which causes a non-left join WITH s1_res AS ( SELECT * FROM s1 @@ -1319,7 +2683,7 @@ WITH s1_res AS ( WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns -- -- Reference tables -- @@ -1371,7 +2735,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1.id, s1.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported on reference tables yet -- -- Postgres + Citus-Distributed table -- @@ -1413,7 +2777,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1.id, s1.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported with combination of distributed/local tables yet MERGE INTO t1 USING (SELECT * FROM s1) sub ON (sub.id = t1.id) WHEN MATCHED AND sub.val = 0 THEN @@ -1422,7 +2786,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported with combination of distributed/local tables yet CREATE TABLE pg(val int); SELECT create_distributed_table('s1', 'id'); NOTICE: Copying data from local table... @@ -1443,7 +2807,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported with combination of distributed/local tables yet -- Mix Postgres table in CTE WITH pg_res AS ( SELECT * FROM pg @@ -1456,7 +2820,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported with combination of distributed/local tables yet -- Match more than one source row should fail same as Postgres behavior SELECT undistribute_table('t1'); NOTICE: creating a new table for merge_schema.t1 @@ -1511,16 +2875,245 @@ WHEN NOT MATCHED THEN INSERT VALUES(mv_source.id, mv_source.val); ERROR: cannot execute MERGE on relation "mv_source" DETAIL: This operation is not supported for materialized views. +-- Distributed tables *must* be colocated +CREATE TABLE dist_target(id int, val varchar); +SELECT create_distributed_table('dist_target', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE dist_source(id int, val varchar); +SELECT create_distributed_table('dist_source', 'id', colocate_with => 'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); +ERROR: For MERGE command, all the distributed tables must be colocated +-- Distributed tables *must* be joined on distribution column +CREATE TABLE dist_colocated(id int, val int); +SELECT create_distributed_table('dist_colocated', 'id', colocate_with => 'dist_target'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +MERGE INTO dist_target +USING dist_colocated +ON dist_target.id = dist_colocated.val -- val is not the distribution column +WHEN MATCHED THEN +UPDATE SET val = dist_colocated.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_colocated.id, dist_colocated.val); +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns +-- Both the source and target must be distributed +MERGE INTO dist_target +USING (SELECT 100 id) AS source +ON dist_target.id = source.id AND dist_target.val = 'const' +WHEN MATCHED THEN +UPDATE SET val = 'source' +WHEN NOT MATCHED THEN +INSERT VALUES(source.id, 'source'); +ERROR: For MERGE command, both the source and target must be distributed +-- Non-hash distributed tables (append/range). +CREATE VIEW show_tables AS +SELECT logicalrelid, partmethod +FROM pg_dist_partition +WHERE (logicalrelid = 'dist_target'::regclass) OR (logicalrelid = 'dist_source'::regclass) +ORDER BY 1; +SELECT undistribute_table('dist_source'); +NOTICE: creating a new table for merge_schema.dist_source +NOTICE: moving the data of merge_schema.dist_source +NOTICE: dropping the old merge_schema.dist_source +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_source CASCADE" +NOTICE: renaming the new table to merge_schema.dist_source + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_source', 'id', 'append'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM show_tables; + logicalrelid | partmethod +--------------------------------------------------------------------- + dist_target | h + dist_source | a +(2 rows) + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); +ERROR: For MERGE command, all the distributed tables must be colocated, for append/range distribution, colocation is not supported +HINT: Consider using hash distribution instead +SELECT undistribute_table('dist_source'); +NOTICE: creating a new table for merge_schema.dist_source +NOTICE: moving the data of merge_schema.dist_source +NOTICE: dropping the old merge_schema.dist_source +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_source CASCADE" +NOTICE: renaming the new table to merge_schema.dist_source + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_source', 'id', 'range'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM show_tables; + logicalrelid | partmethod +--------------------------------------------------------------------- + dist_target | h + dist_source | r +(2 rows) + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); +ERROR: For MERGE command, all the distributed tables must be colocated, for append/range distribution, colocation is not supported +HINT: Consider using hash distribution instead +-- Both are append tables +SELECT undistribute_table('dist_target'); +NOTICE: creating a new table for merge_schema.dist_target +NOTICE: moving the data of merge_schema.dist_target +NOTICE: dropping the old merge_schema.dist_target +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_target CASCADE" +NOTICE: renaming the new table to merge_schema.dist_target + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT undistribute_table('dist_source'); +NOTICE: creating a new table for merge_schema.dist_source +NOTICE: moving the data of merge_schema.dist_source +NOTICE: dropping the old merge_schema.dist_source +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_source CASCADE" +NOTICE: renaming the new table to merge_schema.dist_source + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_target', 'id', 'append'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_source', 'id', 'append'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM show_tables; + logicalrelid | partmethod +--------------------------------------------------------------------- + dist_target | a + dist_source | a +(2 rows) + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); +ERROR: For MERGE command, all the distributed tables must be colocated, for append/range distribution, colocation is not supported +HINT: Consider using hash distribution instead +-- Both are range tables +SELECT undistribute_table('dist_target'); +NOTICE: creating a new table for merge_schema.dist_target +NOTICE: moving the data of merge_schema.dist_target +NOTICE: dropping the old merge_schema.dist_target +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_target CASCADE" +NOTICE: renaming the new table to merge_schema.dist_target + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT undistribute_table('dist_source'); +NOTICE: creating a new table for merge_schema.dist_source +NOTICE: moving the data of merge_schema.dist_source +NOTICE: dropping the old merge_schema.dist_source +NOTICE: drop cascades to view show_tables +CONTEXT: SQL statement "DROP TABLE merge_schema.dist_source CASCADE" +NOTICE: renaming the new table to merge_schema.dist_source + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_target', 'id', 'range'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('dist_source', 'id', 'range'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM show_tables; + logicalrelid | partmethod +--------------------------------------------------------------------- + dist_target | r + dist_source | r +(2 rows) + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); +ERROR: For MERGE command, all the distributed tables must be colocated, for append/range distribution, colocation is not supported +HINT: Consider using hash distribution instead DROP SERVER foreign_server CASCADE; NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to user mapping for postgres on server foreign_server -drop cascades to foreign table foreign_table_4000046 +drop cascades to foreign table foreign_table_4000043 drop cascades to foreign table foreign_table -NOTICE: foreign table "foreign_table_4000046" does not exist, skipping +NOTICE: foreign table "foreign_table_4000043" does not exist, skipping CONTEXT: SQL statement "SELECT citus_drop_all_shards(v_obj.objid, v_obj.schema_name, v_obj.object_name, drop_shards_metadata_only := false)" PL/pgSQL function citus_drop_trigger() line XX at PERFORM +DROP FUNCTION merge_when_and_write(); DROP SCHEMA merge_schema CASCADE; -NOTICE: drop cascades to 56 other objects +NOTICE: drop cascades to 84 other objects DETAIL: drop cascades to function insert_data() drop cascades to table pg_result drop cascades to table local_local @@ -1562,21 +3155,49 @@ drop cascades to table mv_target drop cascades to table mv_source_table drop cascades to materialized view mv_source drop cascades to table mv_local -drop cascades to table dist_table +drop cascades to table dist_table_4000041 drop cascades to function f_dist() drop cascades to table fn_target_4000040 drop cascades to table fn_result drop cascades to table fn_target +drop cascades to table dist_table drop cascades to table fn_local drop cascades to table ft_target -drop cascades to table ft_source_4000045 +drop cascades to table ft_source_4000042 drop cascades to table ft_source drop cascades to extension postgres_fdw +drop cascades to table target_cj +drop cascades to table source_cj1 +drop cascades to table source_cj2 +drop cascades to table pg_target +drop cascades to table pg_source +drop cascades to table citus_target +drop cascades to table citus_source +drop cascades to function compare_tables() +drop cascades to view pg_source_view +drop cascades to view citus_source_view +drop cascades to table pg_pa_target +drop cascades to table citus_pa_target +drop cascades to table pg_pa_source +drop cascades to table citus_pa_source +drop cascades to function pa_compare_tables() +drop cascades to table source_json +drop cascades to table target_json +drop cascades to function immutable_hash(integer) +drop cascades to table source_serial +drop cascades to table target_serial +drop cascades to table target_set +drop cascades to table source_set +drop cascades to function add_s(integer,integer) drop cascades to table pg -drop cascades to table t1_4000062 -drop cascades to table s1_4000063 +drop cascades to table t1_4000131 +drop cascades to table s1_4000132 drop cascades to table t1 drop cascades to table s1 +drop cascades to table dist_colocated +drop cascades to table dist_target +drop cascades to table dist_source +drop cascades to view show_tables SELECT 1 FROM master_remove_node('localhost', :master_port); ?column? --------------------------------------------------------------------- diff --git a/src/test/regress/expected/merge_arbitrary.out b/src/test/regress/expected/merge_arbitrary.out new file mode 100644 index 000000000..345ac1410 --- /dev/null +++ b/src/test/regress/expected/merge_arbitrary.out @@ -0,0 +1,150 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q +\endif +SET search_path TO merge_arbitrary_schema; +INSERT INTO target_cj VALUES (1, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (3, 'target', 0); +INSERT INTO source_cj1 VALUES (2, 'source-1', 10); +INSERT INTO source_cj2 VALUES (2, 'source-2', 20); +BEGIN; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-2 | 0 + 2 | source-2 | 0 + 3 | target | 0 +(4 rows) + +ROLLBACK; +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; + tid | src | val +--------------------------------------------------------------------- + 1 | target | 0 + 2 | source-1 | 20 + 2 | source-1 | 20 + 3 | target | 0 +(4 rows) + +ROLLBACK; +-- Test PREPARE +PREPARE insert(int, int, int) AS +MERGE INTO prept +USING (SELECT $2, s1, s2 FROM preps WHERE s2 > $3) as foo +ON prept.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + $1 +WHEN NOT MATCHED THEN + INSERT VALUES(s1, s2); +PREPARE delete(int) AS +MERGE INTO prept +USING preps +ON prept.t1 = preps.s1 +WHEN MATCHED AND prept.t2 = $1 THEN + DELETE +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; +INSERT INTO prept VALUES(100, 0); +INSERT INTO preps VALUES(100, 0); +INSERT INTO preps VALUES(200, 0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +-- sixth time +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +-- Should have the counter as 14 (7 * 2) +SELECT * FROM prept; + t1 | t2 +--------------------------------------------------------------------- + 100 | 14 +(1 row) + +-- Test local tables +INSERT INTO s1 VALUES(1, 0); -- Matches DELETE clause +INSERT INTO s1 VALUES(2, 1); -- Matches UPDATE clause +INSERT INTO s1 VALUES(3, 1); -- No Match INSERT clause +INSERT INTO s1 VALUES(4, 1); -- No Match INSERT clause +INSERT INTO s1 VALUES(6, 1); -- No Match INSERT clause +INSERT INTO t1 VALUES(1, 0); -- Will be deleted +INSERT INTO t1 VALUES(2, 0); -- Will be updated +INSERT INTO t1 VALUES(5, 0); -- Will be intact +PREPARE local(int, int) AS +WITH s1_res AS ( + SELECT * FROM s1 +) +MERGE INTO t1 + USING s1_res ON (s1_res.id = t1.id) + WHEN MATCHED AND s1_res.val = $1 THEN + DELETE + WHEN MATCHED THEN + UPDATE SET val = t1.val + $2 + WHEN NOT MATCHED THEN + INSERT (id, val) VALUES (s1_res.id, s1_res.val); +BEGIN; +EXECUTE local(0, 1); +SELECT * FROM t1 order by id; + id | val +--------------------------------------------------------------------- + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 0 + 6 | 1 +(5 rows) + +ROLLBACK; +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; +-- sixth time +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; +BEGIN; +EXECUTE local(0, 1); +SELECT * FROM t1 order by id; + id | val +--------------------------------------------------------------------- + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 0 + 6 | 1 +(5 rows) + +ROLLBACK; diff --git a/src/test/regress/expected/merge_arbitrary_0.out b/src/test/regress/expected/merge_arbitrary_0.out new file mode 100644 index 000000000..a7e3fbf20 --- /dev/null +++ b/src/test/regress/expected/merge_arbitrary_0.out @@ -0,0 +1,6 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q diff --git a/src/test/regress/expected/merge_arbitrary_create.out b/src/test/regress/expected/merge_arbitrary_create.out new file mode 100644 index 000000000..9b2444f17 --- /dev/null +++ b/src/test/regress/expected/merge_arbitrary_create.out @@ -0,0 +1,72 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q +\endif +DROP SCHEMA IF EXISTS merge_arbitrary_schema CASCADE; +CREATE SCHEMA merge_arbitrary_schema; +SET search_path TO merge_arbitrary_schema; +SET citus.shard_count TO 4; +SET citus.next_shard_id TO 6000000; +CREATE TABLE target_cj(tid int, src text, val int); +CREATE TABLE source_cj1(sid1 int, src1 text, val1 int); +CREATE TABLE source_cj2(sid2 int, src2 text, val2 int); +SELECT create_distributed_table('target_cj', 'tid'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('source_cj1', 'sid1'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('source_cj2', 'sid2'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE prept(t1 int, t2 int); +CREATE TABLE preps(s1 int, s2 int); +SELECT create_distributed_table('prept', 't1'), create_distributed_table('preps', 's1'); + create_distributed_table | create_distributed_table +--------------------------------------------------------------------- + | +(1 row) + +PREPARE insert(int, int, int) AS +MERGE INTO prept +USING (SELECT $2, s1, s2 FROM preps WHERE s2 > $3) as foo +ON prept.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + $1 +WHEN NOT MATCHED THEN + INSERT VALUES(s1, s2); +PREPARE delete(int) AS +MERGE INTO prept +USING preps +ON prept.t1 = preps.s1 +WHEN MATCHED AND prept.t2 = $1 THEN + DELETE +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; +-- Citus local tables +CREATE TABLE t1(id int, val int); +CREATE TABLE s1(id int, val int); +SELECT citus_add_local_table_to_metadata('t1'); + citus_add_local_table_to_metadata +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_add_local_table_to_metadata('s1'); + citus_add_local_table_to_metadata +--------------------------------------------------------------------- + +(1 row) + diff --git a/src/test/regress/expected/merge_arbitrary_create_0.out b/src/test/regress/expected/merge_arbitrary_create_0.out new file mode 100644 index 000000000..a7e3fbf20 --- /dev/null +++ b/src/test/regress/expected/merge_arbitrary_create_0.out @@ -0,0 +1,6 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q diff --git a/src/test/regress/expected/metadata_sync_helpers.out b/src/test/regress/expected/metadata_sync_helpers.out index dc526afb0..f745b0fe2 100644 --- a/src/test/regress/expected/metadata_sync_helpers.out +++ b/src/test/regress/expected/metadata_sync_helpers.out @@ -713,13 +713,16 @@ BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; SET application_name to 'citus_internal gpid=10000000001'; \set VERBOSITY terse - CREATE TABLE publication_test_table(id int); - CREATE PUBLICATION publication_test FOR TABLE publication_test_table; + CREATE OPERATOR === ( + LEFTARG = int, + RIGHTARG = int, + FUNCTION = int4eq + ); SET ROLE metadata_sync_helper_role; WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) - AS (VALUES ('publication', ARRAY['publication_test']::text[], ARRAY[]::text[], -1, 0, false)) + AS (VALUES ('operator', ARRAY['===']::text[], ARRAY['int','int']::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) FROM distributed_object_data; -ERROR: publication object can not be distributed by Citus +ERROR: operator object can not be distributed by Citus ROLLBACK; -- Show that citus_internal_add_object_metadata checks the priviliges BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; @@ -1194,15 +1197,6 @@ BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; SELECT citus_internal_delete_shard_metadata(shardid) FROM shard_data; ERROR: must be owner of table super_user_table ROLLBACK; --- the user only allowed to delete shards in a distributed transaction -BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; - SET application_name to 'citus_internal gpid=10000000001'; - \set VERBOSITY terse - WITH shard_data(shardid) - AS (VALUES (1420007)) - SELECT citus_internal_delete_shard_metadata(shardid) FROM shard_data; -ERROR: This is an internal Citus function can only be used in a distributed transaction -ROLLBACK; -- the user cannot delete non-existing shards BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; SELECT assign_distributed_transaction_id(0, 8, '2021-07-09 15:41:55.542377+02'); diff --git a/src/test/regress/expected/multi_cluster_management.out b/src/test/regress/expected/multi_cluster_management.out index 56235cc91..0bbbc6899 100644 --- a/src/test/regress/expected/multi_cluster_management.out +++ b/src/test/regress/expected/multi_cluster_management.out @@ -2,19 +2,22 @@ SET citus.next_shard_id TO 1220000; ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 1390000; ALTER SEQUENCE pg_catalog.pg_dist_groupid_seq RESTART 1; -- Tests functions related to cluster membership --- add the nodes to the cluster +-- add the first node to the cluster in transactional mode SELECT 1 FROM master_add_node('localhost', :worker_1_port); ?column? --------------------------------------------------------------------- 1 (1 row) +-- add the second node in nontransactional mode +SET citus.metadata_sync_mode TO 'nontransactional'; SELECT 1 FROM master_add_node('localhost', :worker_2_port); ?column? --------------------------------------------------------------------- 1 (1 row) +RESET citus.metadata_sync_mode; -- I am coordinator SELECT citus_is_coordinator(); citus_is_coordinator @@ -374,7 +377,7 @@ SELECT master_get_active_worker_nodes(); SELECT * FROM master_add_node('localhost', :worker_2_port); master_add_node --------------------------------------------------------------------- - 7 + 6 (1 row) ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART WITH 7; @@ -445,7 +448,7 @@ SELECT run_command_on_workers('UPDATE pg_dist_placement SET shardstate=1 WHERE g -- when there is no primary we should get a pretty error UPDATE pg_dist_node SET noderole = 'secondary' WHERE nodeport=:worker_2_port; SELECT * FROM cluster_management_test; -ERROR: node group 6 does not have a primary node +ERROR: node group 5 does not have a primary node -- when there is no node at all in the group we should get a different error DELETE FROM pg_dist_node WHERE nodeport=:worker_2_port; SELECT run_command_on_workers('DELETE FROM pg_dist_node WHERE nodeport=' || :'worker_2_port'); @@ -455,13 +458,12 @@ SELECT run_command_on_workers('DELETE FROM pg_dist_node WHERE nodeport=' || :'wo (1 row) SELECT * FROM cluster_management_test; -ERROR: there is a shard placement in node group 6 but there are no nodes in that group +ERROR: there is a shard placement in node group 5 but there are no nodes in that group -- clean-up SELECT * INTO old_placements FROM pg_dist_placement WHERE groupid = :worker_2_group; DELETE FROM pg_dist_placement WHERE groupid = :worker_2_group; SELECT master_add_node('localhost', :worker_2_port) AS new_node \gset WARNING: could not find any shard placements for shardId 1220001 -WARNING: could not find any shard placements for shardId 1220001 WARNING: could not find any shard placements for shardId 1220003 WARNING: could not find any shard placements for shardId 1220005 WARNING: could not find any shard placements for shardId 1220007 @@ -1202,6 +1204,33 @@ SELECT start_metadata_sync_to_all_nodes(); t (1 row) +-- nontransactional sync mode tests +SET citus.metadata_sync_mode TO 'nontransactional'; +-- do not allow nontransactional sync inside transaction block +BEGIN; + SELECT start_metadata_sync_to_all_nodes(); +ERROR: do not sync metadata in transaction block when the sync mode is nontransactional +HINT: resync after SET citus.metadata_sync_mode TO 'transactional' +COMMIT; +SELECT start_metadata_sync_to_all_nodes(); + start_metadata_sync_to_all_nodes +--------------------------------------------------------------------- + t +(1 row) + +-- do not allow nontransactional node addition inside transaction block +BEGIN; + SELECT citus_remove_node('localhost', :worker_1_port); + citus_remove_node +--------------------------------------------------------------------- + +(1 row) + + SELECT citus_add_node('localhost', :worker_1_port); +ERROR: do not add node in transaction block when the sync mode is nontransactional +HINT: add the node after SET citus.metadata_sync_mode TO 'transactional' +COMMIT; +RESET citus.metadata_sync_mode; -- verify that at the end of this file, all primary nodes have metadata synced SELECT bool_and(hasmetadata) AND bool_and(metadatasynced) FROM pg_dist_node WHERE isactive = 't' and noderole = 'primary'; ?column? diff --git a/src/test/regress/expected/multi_data_types.out b/src/test/regress/expected/multi_data_types.out index 4bc7da5c7..a88f9e1de 100644 --- a/src/test/regress/expected/multi_data_types.out +++ b/src/test/regress/expected/multi_data_types.out @@ -3,6 +3,14 @@ -- create, distribute, INSERT, SELECT and UPDATE -- =================================================================== SET citus.next_shard_id TO 530000; +-- Given that other test files depend on the existence of types created in this file, +-- we cannot drop them at the end. Instead, we drop them at the beginning of the test +-- to make this file runnable multiple times via run_test.py. +BEGIN; + SET LOCAL client_min_messages TO WARNING; + DROP TYPE IF EXISTS test_composite_type, other_composite_type, bug_status CASCADE; + DROP OPERATOR FAMILY IF EXISTS cats_op_fam USING hash; +COMMIT; -- create a custom type... CREATE TYPE test_composite_type AS ( i integer, diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index 092ec9e5c..d0ed4f82a 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -1258,6 +1258,43 @@ SELECT * FROM pg_dist_cleanup; 2 | 0 | 1 | table_with_orphaned_shards_102011 | 0 | 0 (2 rows) +ALTER EXTENSION citus_columnar UPDATE TO '11.2-1'; +-- Make sure that we defined dependencies from all rel objects (tables, +-- indexes, sequences ..) to columnar table access method ... +SELECT pg_class.oid INTO columnar_schema_members +FROM pg_class, pg_namespace +WHERE pg_namespace.oid=pg_class.relnamespace AND + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); +SELECT refobjid INTO columnar_schema_members_pg_depend +FROM pg_depend +WHERE classid = 'pg_am'::regclass::oid AND + objid = (select oid from pg_am where amname = 'columnar') AND + objsubid = 0 AND + refclassid = 'pg_class'::regclass::oid AND + refobjsubid = 0 AND + deptype = 'n'; +-- ... , so this should be empty, +(TABLE columnar_schema_members EXCEPT TABLE columnar_schema_members_pg_depend) +UNION +(TABLE columnar_schema_members_pg_depend EXCEPT TABLE columnar_schema_members); + oid +--------------------------------------------------------------------- +(0 rows) + +-- ... , and both columnar_schema_members_pg_depend & columnar_schema_members +-- should have 5 entries. +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; + ?column? +--------------------------------------------------------------------- + t +(1 row) + +DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; -- error out as cleanup records remain ALTER EXTENSION citus UPDATE TO '11.0-4'; ERROR: pg_dist_cleanup is introduced in Citus 11.1 @@ -1323,9 +1360,15 @@ SELECT * FROM multi_extension.print_extension_changes(); -- Snapshot of state at 11.3-1 ALTER EXTENSION citus UPDATE TO '11.3-1'; SELECT * FROM multi_extension.print_extension_changes(); - previous_object | current_object + previous_object | current_object --------------------------------------------------------------------- -(0 rows) + | function citus_internal_is_replication_origin_tracking_active() boolean + | function citus_internal_mark_node_not_synced(integer,integer) void + | function citus_internal_start_replication_origin_tracking() void + | function citus_internal_stop_replication_origin_tracking() void + | function worker_adjust_identity_column_seq_ranges(regclass) void + | function worker_drop_all_shell_tables(boolean) +(6 rows) DROP TABLE multi_extension.prev_objects, multi_extension.extension_diff; -- show running version diff --git a/src/test/regress/expected/multi_metadata_sync.out b/src/test/regress/expected/multi_metadata_sync.out index a17dc7634..f371e11e7 100644 --- a/src/test/regress/expected/multi_metadata_sync.out +++ b/src/test/regress/expected/multi_metadata_sync.out @@ -69,9 +69,10 @@ ALTER ROLE CURRENT_USER WITH PASSWORD 'dummypassword'; -- Show that, with no MX tables, activate node snapshot contains only the delete commands, -- pg_dist_node entries, pg_dist_object entries and roles. SELECT unnest(activate_node_snapshot()) order by 1; - unnest + unnest --------------------------------------------------------------------- ALTER DATABASE regression OWNER TO postgres; + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION pg_database_owner DELETE FROM pg_catalog.pg_dist_colocation DELETE FROM pg_catalog.pg_dist_object @@ -89,18 +90,20 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT alter_role_if_exists('postgres', 'ALTER ROLE postgres SET lc_messages = ''C''') SELECT pg_catalog.worker_drop_sequence_dependency(logicalrelid::regclass::text) FROM pg_dist_partition SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE pg_database_owner SET ROLE pg_database_owner SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; -(29 rows) + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; +(32 rows) -- Create a test table with constraints and SERIAL and default from user defined sequence CREATE SEQUENCE user_defined_seq; @@ -127,6 +130,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE public.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE public.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION pg_database_owner CREATE TABLE public.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('public.mx_test_table_col_3_seq'::regclass) NOT NULL, col_4 bigint DEFAULT nextval('public.user_defined_seq'::regclass)) USING heap DELETE FROM pg_catalog.pg_dist_colocation @@ -135,6 +139,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS public.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO pg_database_owner; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -150,21 +155,26 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('public.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE pg_database_owner SET ROLE pg_database_owner SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['public', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('public.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('public.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('public.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('public.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('public.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('public.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('public.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(42 rows) +(49 rows) -- Show that CREATE INDEX commands are included in the activate node snapshot CREATE INDEX mx_index ON mx_test_table(col_2); @@ -176,6 +186,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE public.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE public.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON public.mx_test_table USING btree (col_2) CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION pg_database_owner CREATE TABLE public.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('public.mx_test_table_col_3_seq'::regclass) NOT NULL, col_4 bigint DEFAULT nextval('public.user_defined_seq'::regclass)) USING heap @@ -185,6 +196,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS public.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO pg_database_owner; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -200,21 +212,26 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('public.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE pg_database_owner SET ROLE pg_database_owner SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['public', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('public.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('public.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('public.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('public.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('public.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('public.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('public.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(43 rows) +(50 rows) -- Show that schema changes are included in the activate node snapshot CREATE SCHEMA mx_testing_schema; @@ -227,6 +244,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) CREATE SCHEMA IF NOT EXISTS mx_testing_schema AUTHORIZATION postgres CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION pg_database_owner @@ -237,6 +255,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS mx_testing_schema.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO pg_database_owner; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -252,21 +271,27 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('mx_testing_schema.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE pg_database_owner SET ROLE pg_database_owner SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('mx_testing_schema.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('mx_testing_schema.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('mx_testing_schema.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('mx_testing_schema.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('mx_testing_schema.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('mx_testing_schema.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('mx_testing_schema.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('mx_testing_schema.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(44 rows) +(52 rows) -- Show that append distributed tables are not included in the activate node snapshot CREATE TABLE non_mx_test_table (col_1 int, col_2 text); @@ -285,6 +310,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) CREATE SCHEMA IF NOT EXISTS mx_testing_schema AUTHORIZATION postgres CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION pg_database_owner @@ -295,6 +321,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS mx_testing_schema.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO pg_database_owner; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -310,21 +337,27 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('mx_testing_schema.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE pg_database_owner SET ROLE pg_database_owner SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('mx_testing_schema.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('mx_testing_schema.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('mx_testing_schema.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('mx_testing_schema.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('mx_testing_schema.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('mx_testing_schema.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('mx_testing_schema.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('mx_testing_schema.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(44 rows) +(52 rows) -- Show that range distributed tables are not included in the activate node snapshot UPDATE pg_dist_partition SET partmethod='r' WHERE logicalrelid='non_mx_test_table'::regclass; @@ -336,6 +369,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) CREATE SCHEMA IF NOT EXISTS mx_testing_schema AUTHORIZATION postgres CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION pg_database_owner @@ -346,6 +380,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS mx_testing_schema.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO pg_database_owner; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -361,21 +396,27 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('mx_testing_schema.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE pg_database_owner SET ROLE pg_database_owner SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('mx_testing_schema.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('mx_testing_schema.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('mx_testing_schema.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('mx_testing_schema.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('mx_testing_schema.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('mx_testing_schema.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('mx_testing_schema.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('mx_testing_schema.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(44 rows) +(52 rows) -- Test start_metadata_sync_to_node and citus_activate_node UDFs -- Ensure that hasmetadata=false for all nodes @@ -1761,6 +1802,7 @@ ALTER TABLE dist_table_1 ADD COLUMN b int; ERROR: localhost:xxxxx is a metadata node, but is out of sync HINT: If the node is up, wait until metadata gets synced to it and try again. SELECT master_add_node('localhost', :master_port, groupid => 0); +NOTICE: localhost:xxxxx is the coordinator and already contains metadata, skipping syncing the metadata ERROR: localhost:xxxxx is a metadata node, but is out of sync HINT: If the node is up, wait until metadata gets synced to it and try again. SELECT citus_disable_node_and_wait('localhost', :worker_1_port); @@ -1836,7 +1878,7 @@ ALTER TABLE test_table ADD COLUMN id2 int DEFAULT nextval('mx_test_sequence_1'); ALTER TABLE test_table ALTER COLUMN id2 DROP DEFAULT; ALTER TABLE test_table ALTER COLUMN id2 SET DEFAULT nextval('mx_test_sequence_1'); SELECT unnest(activate_node_snapshot()) order by 1; - unnest + unnest --------------------------------------------------------------------- ALTER DATABASE regression OWNER TO postgres; ALTER SEQUENCE mx_testing_schema.mx_test_table_col_3_seq OWNER TO postgres @@ -1854,6 +1896,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER TABLE public.dist_table_1 OWNER TO postgres ALTER TABLE public.mx_ref OWNER TO postgres ALTER TABLE public.test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) CREATE INDEX mx_index_1 ON mx_test_schema_1.mx_table_1 USING btree (col1) CREATE INDEX mx_index_2 ON mx_test_schema_2.mx_table_2 USING btree (col2) @@ -1874,6 +1917,12 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS mx_test_schema_1.mx_table_1 CASCADE + DROP TABLE IF EXISTS mx_test_schema_2.mx_table_2 CASCADE + DROP TABLE IF EXISTS mx_testing_schema.mx_test_table CASCADE + DROP TABLE IF EXISTS public.dist_table_1 CASCADE + DROP TABLE IF EXISTS public.mx_ref CASCADE + DROP TABLE IF EXISTS public.test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO pg_database_owner; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -1901,18 +1950,35 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_create_truncate_trigger('public.dist_table_1') SELECT worker_create_truncate_trigger('public.mx_ref') SELECT worker_create_truncate_trigger('public.test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE pg_database_owner SET ROLE pg_database_owner SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 - WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (10009, 1, -1, 0, NULL, NULL), (10010, 4, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_test_schema_1', 'mx_table_1']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_test_schema_2', 'mx_table_2']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'mx_ref']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'dist_table_1']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['public', 'mx_test_sequence_0']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['public', 'mx_test_sequence_1']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema_2']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_test_schema_1']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_test_schema_2']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 + WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (10009, 1, -1, 0, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) + WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (10010, 4, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_test_schema_1']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_test_schema_2']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema_2']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'mx_test_sequence_0']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'mx_test_sequence_1']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_test_schema_1', 'mx_table_1']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_test_schema_2', 'mx_table_2']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'dist_table_1']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'mx_ref']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 5, 100001), (1310002, 0, 1, 100002), (1310003, 0, 5, 100003), (1310004, 0, 1, 100004), (1310005, 0, 5, 100005), (1310006, 0, 1, 100006), (1310007, 0, 5, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310020, 0, 1, 100020), (1310021, 0, 5, 100021), (1310022, 0, 1, 100022), (1310023, 0, 5, 100023), (1310024, 0, 1, 100024)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310025, 0, 1, 100025), (1310026, 0, 5, 100026), (1310027, 0, 1, 100027), (1310028, 0, 5, 100028), (1310029, 0, 1, 100029)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; @@ -1925,7 +1991,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.dist_table_1'::regclass, 1310074, 't'::"char", '-2147483648', '-1073741825'), ('public.dist_table_1'::regclass, 1310075, 't'::"char", '-1073741824', '-1'), ('public.dist_table_1'::regclass, 1310076, 't'::"char", '0', '1073741823'), ('public.dist_table_1'::regclass, 1310077, 't'::"char", '1073741824', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.mx_ref'::regclass, 1310073, 't'::"char", NULL, NULL)) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.test_table'::regclass, 1310083, 't'::"char", '-2147483648', '-1073741825'), ('public.test_table'::regclass, 1310084, 't'::"char", '-1073741824', '-1'), ('public.test_table'::regclass, 1310085, 't'::"char", '0', '1073741823'), ('public.test_table'::regclass, 1310086, 't'::"char", '1073741824', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(87 rows) +(111 rows) -- shouldn't work since test_table is MX ALTER TABLE test_table ADD COLUMN id3 bigserial; diff --git a/src/test/regress/expected/multi_metadata_sync_0.out b/src/test/regress/expected/multi_metadata_sync_0.out index b7998db1e..5d5aa56dd 100644 --- a/src/test/regress/expected/multi_metadata_sync_0.out +++ b/src/test/regress/expected/multi_metadata_sync_0.out @@ -69,9 +69,10 @@ ALTER ROLE CURRENT_USER WITH PASSWORD 'dummypassword'; -- Show that, with no MX tables, activate node snapshot contains only the delete commands, -- pg_dist_node entries, pg_dist_object entries and roles. SELECT unnest(activate_node_snapshot()) order by 1; - unnest + unnest --------------------------------------------------------------------- ALTER DATABASE regression OWNER TO postgres; + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION postgres DELETE FROM pg_catalog.pg_dist_colocation DELETE FROM pg_catalog.pg_dist_object @@ -89,18 +90,20 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT alter_role_if_exists('postgres', 'ALTER ROLE postgres SET lc_messages = ''C''') SELECT pg_catalog.worker_drop_sequence_dependency(logicalrelid::regclass::text) FROM pg_dist_partition SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE postgres SET ROLE postgres SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; -(29 rows) + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; +(32 rows) -- Create a test table with constraints and SERIAL and default from user defined sequence CREATE SEQUENCE user_defined_seq; @@ -127,6 +130,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE public.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE public.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION postgres CREATE TABLE public.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('public.mx_test_table_col_3_seq'::regclass) NOT NULL, col_4 bigint DEFAULT nextval('public.user_defined_seq'::regclass)) USING heap DELETE FROM pg_catalog.pg_dist_colocation @@ -135,6 +139,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS public.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO postgres; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -150,21 +155,26 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('public.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE postgres SET ROLE postgres SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['public', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('public.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('public.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('public.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('public.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('public.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('public.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('public.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(42 rows) +(49 rows) -- Show that CREATE INDEX commands are included in the activate node snapshot CREATE INDEX mx_index ON mx_test_table(col_2); @@ -176,6 +186,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE public.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE public.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON public.mx_test_table USING btree (col_2) CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION postgres CREATE TABLE public.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('public.mx_test_table_col_3_seq'::regclass) NOT NULL, col_4 bigint DEFAULT nextval('public.user_defined_seq'::regclass)) USING heap @@ -185,6 +196,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS public.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO postgres; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -200,21 +212,26 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('public.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE postgres SET ROLE postgres SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['public', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('public.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('public.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('public.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('public.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('public.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('public.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('public.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(43 rows) +(50 rows) -- Show that schema changes are included in the activate node snapshot CREATE SCHEMA mx_testing_schema; @@ -227,6 +244,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) CREATE SCHEMA IF NOT EXISTS mx_testing_schema AUTHORIZATION postgres CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION postgres @@ -237,6 +255,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS mx_testing_schema.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO postgres; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -252,21 +271,27 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('mx_testing_schema.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE postgres SET ROLE postgres SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('mx_testing_schema.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('mx_testing_schema.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('mx_testing_schema.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('mx_testing_schema.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('mx_testing_schema.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('mx_testing_schema.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('mx_testing_schema.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('mx_testing_schema.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(44 rows) +(52 rows) -- Show that append distributed tables are not included in the activate node snapshot CREATE TABLE non_mx_test_table (col_1 int, col_2 text); @@ -285,6 +310,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) CREATE SCHEMA IF NOT EXISTS mx_testing_schema AUTHORIZATION postgres CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION postgres @@ -295,6 +321,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS mx_testing_schema.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO postgres; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -310,21 +337,27 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('mx_testing_schema.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE postgres SET ROLE postgres SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('mx_testing_schema.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('mx_testing_schema.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('mx_testing_schema.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('mx_testing_schema.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('mx_testing_schema.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('mx_testing_schema.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('mx_testing_schema.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('mx_testing_schema.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(44 rows) +(52 rows) -- Show that range distributed tables are not included in the activate node snapshot UPDATE pg_dist_partition SET partmethod='r' WHERE logicalrelid='non_mx_test_table'::regclass; @@ -336,6 +369,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) CREATE SCHEMA IF NOT EXISTS mx_testing_schema AUTHORIZATION postgres CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION postgres @@ -346,6 +380,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS mx_testing_schema.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO postgres; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -361,21 +396,27 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('mx_testing_schema.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE postgres SET ROLE postgres SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('mx_testing_schema.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('mx_testing_schema.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('mx_testing_schema.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('mx_testing_schema.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('mx_testing_schema.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('mx_testing_schema.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('mx_testing_schema.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('mx_testing_schema.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(44 rows) +(52 rows) -- Test start_metadata_sync_to_node and citus_activate_node UDFs -- Ensure that hasmetadata=false for all nodes @@ -1761,6 +1802,7 @@ ALTER TABLE dist_table_1 ADD COLUMN b int; ERROR: localhost:xxxxx is a metadata node, but is out of sync HINT: If the node is up, wait until metadata gets synced to it and try again. SELECT master_add_node('localhost', :master_port, groupid => 0); +NOTICE: localhost:xxxxx is the coordinator and already contains metadata, skipping syncing the metadata ERROR: localhost:xxxxx is a metadata node, but is out of sync HINT: If the node is up, wait until metadata gets synced to it and try again. SELECT citus_disable_node_and_wait('localhost', :worker_1_port); @@ -1836,7 +1878,7 @@ ALTER TABLE test_table ADD COLUMN id2 int DEFAULT nextval('mx_test_sequence_1'); ALTER TABLE test_table ALTER COLUMN id2 DROP DEFAULT; ALTER TABLE test_table ALTER COLUMN id2 SET DEFAULT nextval('mx_test_sequence_1'); SELECT unnest(activate_node_snapshot()) order by 1; - unnest + unnest --------------------------------------------------------------------- ALTER DATABASE regression OWNER TO postgres; ALTER SEQUENCE mx_testing_schema.mx_test_table_col_3_seq OWNER TO postgres @@ -1854,6 +1896,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER TABLE public.dist_table_1 OWNER TO postgres ALTER TABLE public.mx_ref OWNER TO postgres ALTER TABLE public.test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) CREATE INDEX mx_index_1 ON mx_test_schema_1.mx_table_1 USING btree (col1) CREATE INDEX mx_index_2 ON mx_test_schema_2.mx_table_2 USING btree (col2) @@ -1874,6 +1917,12 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS mx_test_schema_1.mx_table_1 CASCADE + DROP TABLE IF EXISTS mx_test_schema_2.mx_table_2 CASCADE + DROP TABLE IF EXISTS mx_testing_schema.mx_test_table CASCADE + DROP TABLE IF EXISTS public.dist_table_1 CASCADE + DROP TABLE IF EXISTS public.mx_ref CASCADE + DROP TABLE IF EXISTS public.test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO postgres; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -1901,18 +1950,35 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_create_truncate_trigger('public.dist_table_1') SELECT worker_create_truncate_trigger('public.mx_ref') SELECT worker_create_truncate_trigger('public.test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE postgres SET ROLE postgres SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 - WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (10009, 1, -1, 0, NULL, NULL), (10010, 4, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_test_schema_1', 'mx_table_1']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_test_schema_2', 'mx_table_2']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'mx_ref']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'dist_table_1']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['public', 'mx_test_sequence_0']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['public', 'mx_test_sequence_1']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema_2']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_test_schema_1']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_test_schema_2']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 + WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (10009, 1, -1, 0, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) + WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (10010, 4, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_test_schema_1']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_test_schema_2']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema_2']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'mx_test_sequence_0']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'mx_test_sequence_1']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_test_schema_1', 'mx_table_1']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_test_schema_2', 'mx_table_2']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'dist_table_1']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'mx_ref']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 5, 100001), (1310002, 0, 1, 100002), (1310003, 0, 5, 100003), (1310004, 0, 1, 100004), (1310005, 0, 5, 100005), (1310006, 0, 1, 100006), (1310007, 0, 5, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310020, 0, 1, 100020), (1310021, 0, 5, 100021), (1310022, 0, 1, 100022), (1310023, 0, 5, 100023), (1310024, 0, 1, 100024)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310025, 0, 1, 100025), (1310026, 0, 5, 100026), (1310027, 0, 1, 100027), (1310028, 0, 5, 100028), (1310029, 0, 1, 100029)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; @@ -1925,7 +1991,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.dist_table_1'::regclass, 1310074, 't'::"char", '-2147483648', '-1073741825'), ('public.dist_table_1'::regclass, 1310075, 't'::"char", '-1073741824', '-1'), ('public.dist_table_1'::regclass, 1310076, 't'::"char", '0', '1073741823'), ('public.dist_table_1'::regclass, 1310077, 't'::"char", '1073741824', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.mx_ref'::regclass, 1310073, 't'::"char", NULL, NULL)) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.test_table'::regclass, 1310083, 't'::"char", '-2147483648', '-1073741825'), ('public.test_table'::regclass, 1310084, 't'::"char", '-1073741824', '-1'), ('public.test_table'::regclass, 1310085, 't'::"char", '0', '1073741823'), ('public.test_table'::regclass, 1310086, 't'::"char", '1073741824', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(87 rows) +(111 rows) -- shouldn't work since test_table is MX ALTER TABLE test_table ADD COLUMN id3 bigserial; diff --git a/src/test/regress/expected/multi_modifications.out b/src/test/regress/expected/multi_modifications.out index 5b5764593..887003a97 100644 --- a/src/test/regress/expected/multi_modifications.out +++ b/src/test/regress/expected/multi_modifications.out @@ -177,7 +177,7 @@ INSERT INTO limit_orders VALUES (random() * 100, 'ORCL', 152, '2011-08-25 11:50: INSERT INTO limit_orders VALUES (2036, 'GOOG', 5634, now(), 'buy', random()); -- commands with mutable functions in their quals DELETE FROM limit_orders WHERE id = 246 AND bidder_id = (random() * 1000); -ERROR: functions used in the WHERE clause of modification queries on distributed tables must not be VOLATILE +ERROR: functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE -- commands with mutable but non-volatile functions(ie: stable func.) in their quals -- (the cast to timestamp is because the timestamp_eq_timestamptz operator is stable) DELETE FROM limit_orders WHERE id = 246 AND placed_at = current_timestamp::timestamp; diff --git a/src/test/regress/expected/multi_modifying_xacts.out b/src/test/regress/expected/multi_modifying_xacts.out index 607c327ff..99cdc9ce4 100644 --- a/src/test/regress/expected/multi_modifying_xacts.out +++ b/src/test/regress/expected/multi_modifying_xacts.out @@ -1,5 +1,7 @@ SET citus.next_shard_id TO 1200000; SET citus.next_placement_id TO 1200000; +CREATE SCHEMA multi_modifying_xacts; +SET search_path TO multi_modifying_xacts; -- =================================================================== -- test end-to-end modification functionality -- =================================================================== @@ -127,12 +129,25 @@ BEGIN; INSERT INTO researchers VALUES (8, 5, 'Douglas Engelbart'); INSERT INTO labs VALUES (5, 'Los Alamos'); COMMIT; +SET citus.enable_non_colocated_router_query_pushdown TO ON; SELECT * FROM researchers, labs WHERE labs.id = researchers.lab_id AND researchers.lab_id = 5; id | lab_id | name | id | name --------------------------------------------------------------------- 8 | 5 | Douglas Engelbart | 5 | Los Alamos (1 row) +SET citus.enable_non_colocated_router_query_pushdown TO OFF; +-- fails because researchers and labs are not colocated +SELECT * FROM researchers, labs WHERE labs.id = researchers.lab_id AND researchers.lab_id = 5; +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- works thanks to "OFFSET 0" trick +SELECT * FROM (SELECT * FROM researchers OFFSET 0) researchers, labs WHERE labs.id = researchers.lab_id AND researchers.lab_id = 5; + id | lab_id | name | id | name +--------------------------------------------------------------------- + 8 | 5 | Douglas Engelbart | 5 | Los Alamos +(1 row) + +RESET citus.enable_non_colocated_router_query_pushdown; -- and the other way around is also allowed BEGIN; INSERT INTO labs VALUES (6, 'Bell Labs'); @@ -190,7 +205,7 @@ ALTER TABLE labs ADD COLUMN motto text; INSERT INTO labs VALUES (6, 'Bell Labs'); ABORT; -- but the DDL should correctly roll back -SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.labs'::regclass; +SELECT "Column", "Type", "Modifiers" FROM public.table_desc WHERE relid='multi_modifying_xacts.labs'::regclass; Column | Type | Modifiers --------------------------------------------------------------------- id | bigint | not null @@ -339,7 +354,7 @@ CREATE FUNCTION reject_large_id() RETURNS trigger AS $rli$ END; $rli$ LANGUAGE plpgsql; -- register after insert trigger -SELECT * FROM run_command_on_placements('researchers', 'CREATE CONSTRAINT TRIGGER reject_large_researcher_id AFTER INSERT ON %s DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_large_id()') +SELECT * FROM run_command_on_placements('multi_modifying_xacts.researchers', 'CREATE CONSTRAINT TRIGGER reject_large_researcher_id AFTER INSERT ON %s DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE multi_modifying_xacts.reject_large_id()') ORDER BY nodeport, shardid; nodename | nodeport | shardid | success | result --------------------------------------------------------------------- @@ -498,6 +513,7 @@ AND s.logicalrelid = 'objects'::regclass; -- create trigger on one worker to reject certain values \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$ BEGIN @@ -514,6 +530,7 @@ AFTER INSERT ON objects_1200003 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- test partial failure; worker_1 succeeds, 2 fails -- in this case, we expect the transaction to abort \set VERBOSITY terse @@ -551,6 +568,7 @@ DELETE FROM objects; -- there cannot be errors on different shards at different times -- because the first failure will fail the whole transaction \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$ BEGIN @@ -567,6 +585,7 @@ AFTER INSERT ON labs_1200002 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; BEGIN; INSERT INTO objects VALUES (1, 'apple'); INSERT INTO objects VALUES (2, 'BAD'); @@ -602,12 +621,14 @@ AND (s.logicalrelid = 'objects'::regclass OR -- what if the failures happen at COMMIT time? \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON objects_1200003; CREATE CONSTRAINT TRIGGER reject_bad AFTER INSERT ON objects_1200003 DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- should be the same story as before, just at COMMIT time -- as we use 2PC, the transaction is rollbacked BEGIN; @@ -644,12 +665,14 @@ WHERE sp.shardid = s.shardid AND s.logicalrelid = 'objects'::regclass; -- what if all nodes have failures at COMMIT time? \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON labs_1200002; CREATE CONSTRAINT TRIGGER reject_bad AFTER INSERT ON labs_1200002 DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- reduce the log level for differences between PG14 and PG15 -- in PGconn->errorMessage -- relevant PG commit b15f254466aefbabcbed001929f6e09db59fd158 @@ -688,8 +711,10 @@ AND (s.logicalrelid = 'objects'::regclass OR -- what if one shard (objects) succeeds but another (labs) completely fails? \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON objects_1200003; \c - - - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200004; BEGIN; INSERT INTO objects VALUES (1, 'apple'); @@ -833,6 +858,7 @@ SELECT * FROM reference_modifying_xacts; -- lets fail on of the workers at before the commit time \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad_reference() RETURNS trigger AS $rb$ BEGIN @@ -849,6 +875,7 @@ AFTER INSERT ON reference_modifying_xacts_1200006 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- try without wrapping inside a transaction INSERT INTO reference_modifying_xacts VALUES (999, 3); @@ -860,12 +887,14 @@ ERROR: illegal value COMMIT; -- lets fail one of the workers at COMMIT time \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_reference ON reference_modifying_xacts_1200006; CREATE CONSTRAINT TRIGGER reject_bad_reference AFTER INSERT ON reference_modifying_xacts_1200006 DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- try without wrapping inside a transaction INSERT INTO reference_modifying_xacts VALUES (999, 3); @@ -890,8 +919,10 @@ ORDER BY s.logicalrelid, sp.shardstate; -- for the time-being drop the constraint \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_reference ON reference_modifying_xacts_1200006; \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- now create a hash distributed table and run tests -- including both the reference table and the hash -- distributed table @@ -923,6 +954,7 @@ INSERT INTO hash_modifying_xacts VALUES (2, 2); ABORT; -- lets fail one of the workers before COMMIT time for the hash table \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad_hash() RETURNS trigger AS $rb$ BEGIN @@ -939,6 +971,7 @@ AFTER INSERT ON hash_modifying_xacts_1200007 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_hash(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- the transaction as a whole should fail BEGIN; @@ -955,6 +988,7 @@ SELECT * FROM reference_modifying_xacts WHERE key = 55; -- now lets fail on of the workers for the hash distributed table table -- when there is a reference table involved \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_hash ON hash_modifying_xacts_1200007; -- the trigger is on execution time CREATE CONSTRAINT TRIGGER reject_bad_hash @@ -962,6 +996,7 @@ AFTER INSERT ON hash_modifying_xacts_1200007 DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad_hash(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- the transaction as a whole should fail BEGIN; @@ -994,11 +1029,13 @@ ORDER BY s.logicalrelid, sp.shardstate; -- and ensure that hash distributed table's -- change is rollbacked as well \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; CREATE CONSTRAINT TRIGGER reject_bad_reference AFTER INSERT ON reference_modifying_xacts_1200006 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse BEGIN; -- to expand participant to include all worker nodes @@ -1127,8 +1164,10 @@ SELECT count(*) FROM pg_dist_transaction; -- in which we'll make the remote host unavailable -- first create the new user on all nodes CREATE USER test_user; +GRANT ALL ON SCHEMA multi_modifying_xacts TO test_user; -- now connect back to the master with the new user \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200015; CREATE TABLE reference_failure_test (key int, value int); SELECT create_reference_table('reference_failure_test'); @@ -1148,21 +1187,24 @@ SELECT create_distributed_table('numbers_hash_failure_test', 'key'); -- ensure that the shard is created for this user \c - test_user - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.override_table_visibility TO false; \dt reference_failure_test_1200015 - List of relations - Schema | Name | Type | Owner + List of relations + Schema | Name | Type | Owner --------------------------------------------------------------------- - public | reference_failure_test_1200015 | table | test_user + multi_modifying_xacts | reference_failure_test_1200015 | table | test_user (1 row) -- now connect with the default user, -- and rename the existing user \c - :default_user - :worker_1_port +SET search_path TO multi_modifying_xacts; ALTER USER test_user RENAME TO test_user_new; NOTICE: not propagating ALTER ROLE ... RENAME TO commands to worker nodes -- connect back to master and query the reference table \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; -- should fail since the worker doesn't have test_user anymore INSERT INTO reference_failure_test VALUES (1, '1'); ERROR: connection to the remote node localhost:xxxxx failed with the following error: FATAL: role "test_user" does not exist @@ -1277,14 +1319,17 @@ WARNING: connection to the remote node localhost:xxxxx failed with the followin -- break the other node as well \c - :default_user - :worker_2_port +SET search_path TO multi_modifying_xacts; ALTER USER test_user RENAME TO test_user_new; NOTICE: not propagating ALTER ROLE ... RENAME TO commands to worker nodes \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; -- fails on all shard placements INSERT INTO numbers_hash_failure_test VALUES (2,2); ERROR: connection to the remote node localhost:xxxxx failed with the following error: FATAL: role "test_user" does not exist -- connect back to the master with the proper user to continue the tests \c - :default_user - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200020; SET citus.next_placement_id TO 1200033; -- unbreak both nodes by renaming the user back to the original name @@ -1297,6 +1342,7 @@ SELECT * FROM run_command_on_workers('ALTER USER test_user_new RENAME TO test_us DROP TABLE reference_modifying_xacts, hash_modifying_xacts, hash_modifying_xacts_second, reference_failure_test, numbers_hash_failure_test; +REVOKE ALL ON SCHEMA multi_modifying_xacts FROM test_user; DROP USER test_user; -- set up foreign keys to test transactions with co-located and reference tables BEGIN; @@ -1322,7 +1368,9 @@ SELECT create_reference_table('itemgroups'); (1 row) +SET client_min_messages TO WARNING; DROP TABLE IF EXISTS users ; +RESET client_min_messages; CREATE TABLE users ( id int PRIMARY KEY, name text, @@ -1354,18 +1402,18 @@ JOIN USING (shardid) ORDER BY id; - id | shard_name | nodename | nodeport + id | shard_name | nodename | nodeport --------------------------------------------------------------------- - 1 | users_1200022 | localhost | 57637 - 2 | users_1200025 | localhost | 57638 - 3 | users_1200023 | localhost | 57638 - 4 | users_1200023 | localhost | 57638 - 5 | users_1200022 | localhost | 57637 - 6 | users_1200024 | localhost | 57637 - 7 | users_1200023 | localhost | 57638 - 8 | users_1200022 | localhost | 57637 - 9 | users_1200025 | localhost | 57638 - 10 | users_1200022 | localhost | 57637 + 1 | multi_modifying_xacts.users_1200022 | localhost | 57637 + 2 | multi_modifying_xacts.users_1200025 | localhost | 57638 + 3 | multi_modifying_xacts.users_1200023 | localhost | 57638 + 4 | multi_modifying_xacts.users_1200023 | localhost | 57638 + 5 | multi_modifying_xacts.users_1200022 | localhost | 57637 + 6 | multi_modifying_xacts.users_1200024 | localhost | 57637 + 7 | multi_modifying_xacts.users_1200023 | localhost | 57638 + 8 | multi_modifying_xacts.users_1200022 | localhost | 57637 + 9 | multi_modifying_xacts.users_1200025 | localhost | 57638 + 10 | multi_modifying_xacts.users_1200022 | localhost | 57637 (10 rows) END; @@ -1546,5 +1594,5 @@ SELECT name FROM labs WHERE id = 1001; (1 row) RESET citus.function_opens_transaction_block; -DROP FUNCTION insert_abort(); -DROP TABLE items, users, itemgroups, usergroups, researchers, labs; +SET client_min_messages TO WARNING; +DROP SCHEMA multi_modifying_xacts CASCADE; diff --git a/src/test/regress/expected/multi_move_mx.out b/src/test/regress/expected/multi_move_mx.out index 833c9f7df..b6cc5d0d7 100644 --- a/src/test/regress/expected/multi_move_mx.out +++ b/src/test/regress/expected/multi_move_mx.out @@ -238,8 +238,40 @@ ORDER BY LIMIT 1 OFFSET 1; ERROR: operation is not allowed on this node HINT: Connect to the coordinator and run it again. +-- Check that shards of a table with GENERATED columns can be moved. +\c - - - :master_port +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; +CREATE TABLE mx_table_with_generated_column (a int, b int GENERATED ALWAYS AS ( a + 3 ) STORED, c int); +SELECT create_distributed_table('mx_table_with_generated_column', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Check that dropped columns are handled properly in a move. +ALTER TABLE mx_table_with_generated_column DROP COLUMN c; +-- Move a shard from worker 1 to worker 2 +SELECT + citus_move_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical') +FROM + pg_dist_shard NATURAL JOIN pg_dist_shard_placement +WHERE + logicalrelid = 'mx_table_with_generated_column'::regclass + AND nodeport = :worker_1_port +ORDER BY + shardid +LIMIT 1; + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + -- Cleanup \c - - - :master_port +SET client_min_messages TO WARNING; +CALL citus_cleanup_orphaned_resources(); +DROP TABLE mx_table_with_generated_column; DROP TABLE mx_table_1; DROP TABLE mx_table_2; DROP TABLE mx_table_3; diff --git a/src/test/regress/expected/multi_multiuser_auth.out b/src/test/regress/expected/multi_multiuser_auth.out index 15d34b563..4b7c6fcc7 100644 --- a/src/test/regress/expected/multi_multiuser_auth.out +++ b/src/test/regress/expected/multi_multiuser_auth.out @@ -22,7 +22,7 @@ SELECT nodeid AS worker_1_id FROM pg_dist_node WHERE nodename = 'localhost' AND SELECT nodeid AS worker_2_id FROM pg_dist_node WHERE nodename = 'localhost' AND nodeport = :worker_2_port; worker_2_id --------------------------------------------------------------------- - 18 + 35 (1 row) \gset diff --git a/src/test/regress/expected/multi_mx_copy_data.out b/src/test/regress/expected/multi_mx_copy_data.out index c1d3d7180..0db64c16e 100644 --- a/src/test/regress/expected/multi_mx_copy_data.out +++ b/src/test/regress/expected/multi_mx_copy_data.out @@ -1,6 +1,10 @@ -- -- MULTI_MX_COPY_DATA -- +-- We truncate them to make this test runnable multiple times. +-- Note that we cannot do that at the end of the test because +-- we need to keep the data for the other tests. +TRUNCATE lineitem_mx, orders_mx; \set nation_data_file :abs_srcdir '/data/nation.data' \set client_side_copy_command '\\copy nation_hash FROM ' :'nation_data_file' ' with delimiter '''|''';' :client_side_copy_command @@ -161,3 +165,4 @@ SET search_path TO public; :client_side_copy_command \set client_side_copy_command '\\copy supplier_mx FROM ' :'supplier_data_file' ' with delimiter '''|''';' :client_side_copy_command +DROP TABLE citus_mx_test_schema.nation_hash_replicated; diff --git a/src/test/regress/expected/multi_mx_create_table.out b/src/test/regress/expected/multi_mx_create_table.out index ffbaa738e..6bdef048a 100644 --- a/src/test/regress/expected/multi_mx_create_table.out +++ b/src/test/regress/expected/multi_mx_create_table.out @@ -497,22 +497,22 @@ ORDER BY table_name::text; SELECT shard_name, table_name, citus_table_type, shard_size FROM citus_shards ORDER BY shard_name::text; shard_name | table_name | citus_table_type | shard_size --------------------------------------------------------------------- - app_analytics_events_mx_1220096 | app_analytics_events_mx | distributed | 0 - app_analytics_events_mx_1220096 | app_analytics_events_mx | distributed | 0 - app_analytics_events_mx_1220096 | app_analytics_events_mx | distributed | 0 - app_analytics_events_mx_1220096 | app_analytics_events_mx | distributed | 0 - app_analytics_events_mx_1220096 | app_analytics_events_mx | distributed | 0 - app_analytics_events_mx_1220096 | app_analytics_events_mx | distributed | 0 - app_analytics_events_mx_1220096 | app_analytics_events_mx | distributed | 0 - app_analytics_events_mx_1220097 | app_analytics_events_mx | distributed | 0 - app_analytics_events_mx_1220098 | app_analytics_events_mx | distributed | 0 - app_analytics_events_mx_1220098 | app_analytics_events_mx | distributed | 0 - app_analytics_events_mx_1220098 | app_analytics_events_mx | distributed | 0 - app_analytics_events_mx_1220098 | app_analytics_events_mx | distributed | 0 - app_analytics_events_mx_1220098 | app_analytics_events_mx | distributed | 0 - app_analytics_events_mx_1220098 | app_analytics_events_mx | distributed | 0 - app_analytics_events_mx_1220098 | app_analytics_events_mx | distributed | 0 - app_analytics_events_mx_1220099 | app_analytics_events_mx | distributed | 0 + app_analytics_events_mx_1220096 | app_analytics_events_mx | distributed | 8192 + app_analytics_events_mx_1220096 | app_analytics_events_mx | distributed | 8192 + app_analytics_events_mx_1220096 | app_analytics_events_mx | distributed | 8192 + app_analytics_events_mx_1220096 | app_analytics_events_mx | distributed | 8192 + app_analytics_events_mx_1220096 | app_analytics_events_mx | distributed | 8192 + app_analytics_events_mx_1220096 | app_analytics_events_mx | distributed | 8192 + app_analytics_events_mx_1220096 | app_analytics_events_mx | distributed | 8192 + app_analytics_events_mx_1220097 | app_analytics_events_mx | distributed | 8192 + app_analytics_events_mx_1220098 | app_analytics_events_mx | distributed | 8192 + app_analytics_events_mx_1220098 | app_analytics_events_mx | distributed | 8192 + app_analytics_events_mx_1220098 | app_analytics_events_mx | distributed | 8192 + app_analytics_events_mx_1220098 | app_analytics_events_mx | distributed | 8192 + app_analytics_events_mx_1220098 | app_analytics_events_mx | distributed | 8192 + app_analytics_events_mx_1220098 | app_analytics_events_mx | distributed | 8192 + app_analytics_events_mx_1220098 | app_analytics_events_mx | distributed | 8192 + app_analytics_events_mx_1220099 | app_analytics_events_mx | distributed | 8192 articles_hash_mx_1220104 | articles_hash_mx | distributed | 0 articles_hash_mx_1220104 | articles_hash_mx | distributed | 0 articles_hash_mx_1220104 | articles_hash_mx | distributed | 0 @@ -608,22 +608,22 @@ SELECT shard_name, table_name, citus_table_type, shard_size FROM citus_shards OR citus_mx_test_schema.nation_hash_collation_search_path_1220046 | citus_mx_test_schema.nation_hash_collation_search_path | distributed | 0 citus_mx_test_schema.nation_hash_collation_search_path_1220046 | citus_mx_test_schema.nation_hash_collation_search_path | distributed | 0 citus_mx_test_schema.nation_hash_collation_search_path_1220047 | citus_mx_test_schema.nation_hash_collation_search_path | distributed | 8192 - citus_mx_test_schema.nation_hash_composite_types_1220048 | citus_mx_test_schema.nation_hash_composite_types | distributed | 8192 - citus_mx_test_schema.nation_hash_composite_types_1220048 | citus_mx_test_schema.nation_hash_composite_types | distributed | 8192 - citus_mx_test_schema.nation_hash_composite_types_1220048 | citus_mx_test_schema.nation_hash_composite_types | distributed | 8192 - citus_mx_test_schema.nation_hash_composite_types_1220048 | citus_mx_test_schema.nation_hash_composite_types | distributed | 8192 - citus_mx_test_schema.nation_hash_composite_types_1220048 | citus_mx_test_schema.nation_hash_composite_types | distributed | 8192 - citus_mx_test_schema.nation_hash_composite_types_1220048 | citus_mx_test_schema.nation_hash_composite_types | distributed | 8192 - citus_mx_test_schema.nation_hash_composite_types_1220048 | citus_mx_test_schema.nation_hash_composite_types | distributed | 8192 - citus_mx_test_schema.nation_hash_composite_types_1220049 | citus_mx_test_schema.nation_hash_composite_types | distributed | 8192 - citus_mx_test_schema.nation_hash_composite_types_1220050 | citus_mx_test_schema.nation_hash_composite_types | distributed | 0 - citus_mx_test_schema.nation_hash_composite_types_1220050 | citus_mx_test_schema.nation_hash_composite_types | distributed | 0 - citus_mx_test_schema.nation_hash_composite_types_1220050 | citus_mx_test_schema.nation_hash_composite_types | distributed | 0 - citus_mx_test_schema.nation_hash_composite_types_1220050 | citus_mx_test_schema.nation_hash_composite_types | distributed | 0 - citus_mx_test_schema.nation_hash_composite_types_1220050 | citus_mx_test_schema.nation_hash_composite_types | distributed | 0 - citus_mx_test_schema.nation_hash_composite_types_1220050 | citus_mx_test_schema.nation_hash_composite_types | distributed | 0 - citus_mx_test_schema.nation_hash_composite_types_1220050 | citus_mx_test_schema.nation_hash_composite_types | distributed | 0 - citus_mx_test_schema.nation_hash_composite_types_1220051 | citus_mx_test_schema.nation_hash_composite_types | distributed | 8192 + citus_mx_test_schema.nation_hash_composite_types_1220048 | citus_mx_test_schema.nation_hash_composite_types | distributed | 16384 + citus_mx_test_schema.nation_hash_composite_types_1220048 | citus_mx_test_schema.nation_hash_composite_types | distributed | 16384 + citus_mx_test_schema.nation_hash_composite_types_1220048 | citus_mx_test_schema.nation_hash_composite_types | distributed | 16384 + citus_mx_test_schema.nation_hash_composite_types_1220048 | citus_mx_test_schema.nation_hash_composite_types | distributed | 16384 + citus_mx_test_schema.nation_hash_composite_types_1220048 | citus_mx_test_schema.nation_hash_composite_types | distributed | 16384 + citus_mx_test_schema.nation_hash_composite_types_1220048 | citus_mx_test_schema.nation_hash_composite_types | distributed | 16384 + citus_mx_test_schema.nation_hash_composite_types_1220048 | citus_mx_test_schema.nation_hash_composite_types | distributed | 16384 + citus_mx_test_schema.nation_hash_composite_types_1220049 | citus_mx_test_schema.nation_hash_composite_types | distributed | 16384 + citus_mx_test_schema.nation_hash_composite_types_1220050 | citus_mx_test_schema.nation_hash_composite_types | distributed | 8192 + citus_mx_test_schema.nation_hash_composite_types_1220050 | citus_mx_test_schema.nation_hash_composite_types | distributed | 8192 + citus_mx_test_schema.nation_hash_composite_types_1220050 | citus_mx_test_schema.nation_hash_composite_types | distributed | 8192 + citus_mx_test_schema.nation_hash_composite_types_1220050 | citus_mx_test_schema.nation_hash_composite_types | distributed | 8192 + citus_mx_test_schema.nation_hash_composite_types_1220050 | citus_mx_test_schema.nation_hash_composite_types | distributed | 8192 + citus_mx_test_schema.nation_hash_composite_types_1220050 | citus_mx_test_schema.nation_hash_composite_types | distributed | 8192 + citus_mx_test_schema.nation_hash_composite_types_1220050 | citus_mx_test_schema.nation_hash_composite_types | distributed | 8192 + citus_mx_test_schema.nation_hash_composite_types_1220051 | citus_mx_test_schema.nation_hash_composite_types | distributed | 16384 citus_mx_test_schema_join_1.nation_hash_1220032 | citus_mx_test_schema_join_1.nation_hash | distributed | 0 citus_mx_test_schema_join_1.nation_hash_1220032 | citus_mx_test_schema_join_1.nation_hash | distributed | 0 citus_mx_test_schema_join_1.nation_hash_1220032 | citus_mx_test_schema_join_1.nation_hash | distributed | 0 @@ -696,109 +696,109 @@ SELECT shard_name, table_name, citus_table_type, shard_size FROM citus_shards OR customer_mx_1220084 | customer_mx | reference | 0 customer_mx_1220084 | customer_mx | reference | 0 customer_mx_1220084 | customer_mx | reference | 0 - labs_mx_1220102 | labs_mx | distributed | 0 - labs_mx_1220102 | labs_mx | distributed | 0 - labs_mx_1220102 | labs_mx | distributed | 0 - labs_mx_1220102 | labs_mx | distributed | 0 - labs_mx_1220102 | labs_mx | distributed | 0 - labs_mx_1220102 | labs_mx | distributed | 0 - labs_mx_1220102 | labs_mx | distributed | 0 - limit_orders_mx_1220092 | limit_orders_mx | distributed | 0 - limit_orders_mx_1220092 | limit_orders_mx | distributed | 0 - limit_orders_mx_1220092 | limit_orders_mx | distributed | 0 - limit_orders_mx_1220092 | limit_orders_mx | distributed | 0 - limit_orders_mx_1220092 | limit_orders_mx | distributed | 0 - limit_orders_mx_1220092 | limit_orders_mx | distributed | 0 - limit_orders_mx_1220092 | limit_orders_mx | distributed | 0 - limit_orders_mx_1220093 | limit_orders_mx | distributed | 0 - lineitem_mx_1220052 | lineitem_mx | distributed | 0 - lineitem_mx_1220052 | lineitem_mx | distributed | 0 - lineitem_mx_1220052 | lineitem_mx | distributed | 0 - lineitem_mx_1220052 | lineitem_mx | distributed | 0 - lineitem_mx_1220052 | lineitem_mx | distributed | 0 - lineitem_mx_1220052 | lineitem_mx | distributed | 0 - lineitem_mx_1220052 | lineitem_mx | distributed | 0 - lineitem_mx_1220053 | lineitem_mx | distributed | 0 - lineitem_mx_1220054 | lineitem_mx | distributed | 0 - lineitem_mx_1220054 | lineitem_mx | distributed | 0 - lineitem_mx_1220054 | lineitem_mx | distributed | 0 - lineitem_mx_1220054 | lineitem_mx | distributed | 0 - lineitem_mx_1220054 | lineitem_mx | distributed | 0 - lineitem_mx_1220054 | lineitem_mx | distributed | 0 - lineitem_mx_1220054 | lineitem_mx | distributed | 0 - lineitem_mx_1220055 | lineitem_mx | distributed | 0 - lineitem_mx_1220056 | lineitem_mx | distributed | 0 - lineitem_mx_1220056 | lineitem_mx | distributed | 0 - lineitem_mx_1220056 | lineitem_mx | distributed | 0 - lineitem_mx_1220056 | lineitem_mx | distributed | 0 - lineitem_mx_1220056 | lineitem_mx | distributed | 0 - lineitem_mx_1220056 | lineitem_mx | distributed | 0 - lineitem_mx_1220056 | lineitem_mx | distributed | 0 - lineitem_mx_1220057 | lineitem_mx | distributed | 0 - lineitem_mx_1220058 | lineitem_mx | distributed | 0 - lineitem_mx_1220058 | lineitem_mx | distributed | 0 - lineitem_mx_1220058 | lineitem_mx | distributed | 0 - lineitem_mx_1220058 | lineitem_mx | distributed | 0 - lineitem_mx_1220058 | lineitem_mx | distributed | 0 - lineitem_mx_1220058 | lineitem_mx | distributed | 0 - lineitem_mx_1220058 | lineitem_mx | distributed | 0 - lineitem_mx_1220059 | lineitem_mx | distributed | 0 - lineitem_mx_1220060 | lineitem_mx | distributed | 0 - lineitem_mx_1220060 | lineitem_mx | distributed | 0 - lineitem_mx_1220060 | lineitem_mx | distributed | 0 - lineitem_mx_1220060 | lineitem_mx | distributed | 0 - lineitem_mx_1220060 | lineitem_mx | distributed | 0 - lineitem_mx_1220060 | lineitem_mx | distributed | 0 - lineitem_mx_1220060 | lineitem_mx | distributed | 0 - lineitem_mx_1220061 | lineitem_mx | distributed | 0 - lineitem_mx_1220062 | lineitem_mx | distributed | 0 - lineitem_mx_1220062 | lineitem_mx | distributed | 0 - lineitem_mx_1220062 | lineitem_mx | distributed | 0 - lineitem_mx_1220062 | lineitem_mx | distributed | 0 - lineitem_mx_1220062 | lineitem_mx | distributed | 0 - lineitem_mx_1220062 | lineitem_mx | distributed | 0 - lineitem_mx_1220062 | lineitem_mx | distributed | 0 - lineitem_mx_1220063 | lineitem_mx | distributed | 0 - lineitem_mx_1220064 | lineitem_mx | distributed | 0 - lineitem_mx_1220064 | lineitem_mx | distributed | 0 - lineitem_mx_1220064 | lineitem_mx | distributed | 0 - lineitem_mx_1220064 | lineitem_mx | distributed | 0 - lineitem_mx_1220064 | lineitem_mx | distributed | 0 - lineitem_mx_1220064 | lineitem_mx | distributed | 0 - lineitem_mx_1220064 | lineitem_mx | distributed | 0 - lineitem_mx_1220065 | lineitem_mx | distributed | 0 - lineitem_mx_1220066 | lineitem_mx | distributed | 0 - lineitem_mx_1220066 | lineitem_mx | distributed | 0 - lineitem_mx_1220066 | lineitem_mx | distributed | 0 - lineitem_mx_1220066 | lineitem_mx | distributed | 0 - lineitem_mx_1220066 | lineitem_mx | distributed | 0 - lineitem_mx_1220066 | lineitem_mx | distributed | 0 - lineitem_mx_1220066 | lineitem_mx | distributed | 0 - lineitem_mx_1220067 | lineitem_mx | distributed | 0 - multiple_hash_mx_1220094 | multiple_hash_mx | distributed | 0 - multiple_hash_mx_1220094 | multiple_hash_mx | distributed | 0 - multiple_hash_mx_1220094 | multiple_hash_mx | distributed | 0 - multiple_hash_mx_1220094 | multiple_hash_mx | distributed | 0 - multiple_hash_mx_1220094 | multiple_hash_mx | distributed | 0 - multiple_hash_mx_1220094 | multiple_hash_mx | distributed | 0 - multiple_hash_mx_1220094 | multiple_hash_mx | distributed | 0 - multiple_hash_mx_1220095 | multiple_hash_mx | distributed | 0 - mx_ddl_table_1220088 | mx_ddl_table | distributed | 8192 - mx_ddl_table_1220088 | mx_ddl_table | distributed | 8192 - mx_ddl_table_1220088 | mx_ddl_table | distributed | 8192 - mx_ddl_table_1220088 | mx_ddl_table | distributed | 8192 - mx_ddl_table_1220088 | mx_ddl_table | distributed | 8192 - mx_ddl_table_1220088 | mx_ddl_table | distributed | 8192 - mx_ddl_table_1220088 | mx_ddl_table | distributed | 8192 - mx_ddl_table_1220089 | mx_ddl_table | distributed | 8192 - mx_ddl_table_1220090 | mx_ddl_table | distributed | 8192 - mx_ddl_table_1220090 | mx_ddl_table | distributed | 8192 - mx_ddl_table_1220090 | mx_ddl_table | distributed | 8192 - mx_ddl_table_1220090 | mx_ddl_table | distributed | 8192 - mx_ddl_table_1220090 | mx_ddl_table | distributed | 8192 - mx_ddl_table_1220090 | mx_ddl_table | distributed | 8192 - mx_ddl_table_1220090 | mx_ddl_table | distributed | 8192 - mx_ddl_table_1220091 | mx_ddl_table | distributed | 8192 + labs_mx_1220102 | labs_mx | distributed | 8192 + labs_mx_1220102 | labs_mx | distributed | 8192 + labs_mx_1220102 | labs_mx | distributed | 8192 + labs_mx_1220102 | labs_mx | distributed | 8192 + labs_mx_1220102 | labs_mx | distributed | 8192 + labs_mx_1220102 | labs_mx | distributed | 8192 + labs_mx_1220102 | labs_mx | distributed | 8192 + limit_orders_mx_1220092 | limit_orders_mx | distributed | 16384 + limit_orders_mx_1220092 | limit_orders_mx | distributed | 16384 + limit_orders_mx_1220092 | limit_orders_mx | distributed | 16384 + limit_orders_mx_1220092 | limit_orders_mx | distributed | 16384 + limit_orders_mx_1220092 | limit_orders_mx | distributed | 16384 + limit_orders_mx_1220092 | limit_orders_mx | distributed | 16384 + limit_orders_mx_1220092 | limit_orders_mx | distributed | 16384 + limit_orders_mx_1220093 | limit_orders_mx | distributed | 16384 + lineitem_mx_1220052 | lineitem_mx | distributed | 16384 + lineitem_mx_1220052 | lineitem_mx | distributed | 16384 + lineitem_mx_1220052 | lineitem_mx | distributed | 16384 + lineitem_mx_1220052 | lineitem_mx | distributed | 16384 + lineitem_mx_1220052 | lineitem_mx | distributed | 16384 + lineitem_mx_1220052 | lineitem_mx | distributed | 16384 + lineitem_mx_1220052 | lineitem_mx | distributed | 16384 + lineitem_mx_1220053 | lineitem_mx | distributed | 16384 + lineitem_mx_1220054 | lineitem_mx | distributed | 16384 + lineitem_mx_1220054 | lineitem_mx | distributed | 16384 + lineitem_mx_1220054 | lineitem_mx | distributed | 16384 + lineitem_mx_1220054 | lineitem_mx | distributed | 16384 + lineitem_mx_1220054 | lineitem_mx | distributed | 16384 + lineitem_mx_1220054 | lineitem_mx | distributed | 16384 + lineitem_mx_1220054 | lineitem_mx | distributed | 16384 + lineitem_mx_1220055 | lineitem_mx | distributed | 16384 + lineitem_mx_1220056 | lineitem_mx | distributed | 16384 + lineitem_mx_1220056 | lineitem_mx | distributed | 16384 + lineitem_mx_1220056 | lineitem_mx | distributed | 16384 + lineitem_mx_1220056 | lineitem_mx | distributed | 16384 + lineitem_mx_1220056 | lineitem_mx | distributed | 16384 + lineitem_mx_1220056 | lineitem_mx | distributed | 16384 + lineitem_mx_1220056 | lineitem_mx | distributed | 16384 + lineitem_mx_1220057 | lineitem_mx | distributed | 16384 + lineitem_mx_1220058 | lineitem_mx | distributed | 16384 + lineitem_mx_1220058 | lineitem_mx | distributed | 16384 + lineitem_mx_1220058 | lineitem_mx | distributed | 16384 + lineitem_mx_1220058 | lineitem_mx | distributed | 16384 + lineitem_mx_1220058 | lineitem_mx | distributed | 16384 + lineitem_mx_1220058 | lineitem_mx | distributed | 16384 + lineitem_mx_1220058 | lineitem_mx | distributed | 16384 + lineitem_mx_1220059 | lineitem_mx | distributed | 16384 + lineitem_mx_1220060 | lineitem_mx | distributed | 16384 + lineitem_mx_1220060 | lineitem_mx | distributed | 16384 + lineitem_mx_1220060 | lineitem_mx | distributed | 16384 + lineitem_mx_1220060 | lineitem_mx | distributed | 16384 + lineitem_mx_1220060 | lineitem_mx | distributed | 16384 + lineitem_mx_1220060 | lineitem_mx | distributed | 16384 + lineitem_mx_1220060 | lineitem_mx | distributed | 16384 + lineitem_mx_1220061 | lineitem_mx | distributed | 16384 + lineitem_mx_1220062 | lineitem_mx | distributed | 16384 + lineitem_mx_1220062 | lineitem_mx | distributed | 16384 + lineitem_mx_1220062 | lineitem_mx | distributed | 16384 + lineitem_mx_1220062 | lineitem_mx | distributed | 16384 + lineitem_mx_1220062 | lineitem_mx | distributed | 16384 + lineitem_mx_1220062 | lineitem_mx | distributed | 16384 + lineitem_mx_1220062 | lineitem_mx | distributed | 16384 + lineitem_mx_1220063 | lineitem_mx | distributed | 16384 + lineitem_mx_1220064 | lineitem_mx | distributed | 16384 + lineitem_mx_1220064 | lineitem_mx | distributed | 16384 + lineitem_mx_1220064 | lineitem_mx | distributed | 16384 + lineitem_mx_1220064 | lineitem_mx | distributed | 16384 + lineitem_mx_1220064 | lineitem_mx | distributed | 16384 + lineitem_mx_1220064 | lineitem_mx | distributed | 16384 + lineitem_mx_1220064 | lineitem_mx | distributed | 16384 + lineitem_mx_1220065 | lineitem_mx | distributed | 16384 + lineitem_mx_1220066 | lineitem_mx | distributed | 16384 + lineitem_mx_1220066 | lineitem_mx | distributed | 16384 + lineitem_mx_1220066 | lineitem_mx | distributed | 16384 + lineitem_mx_1220066 | lineitem_mx | distributed | 16384 + lineitem_mx_1220066 | lineitem_mx | distributed | 16384 + lineitem_mx_1220066 | lineitem_mx | distributed | 16384 + lineitem_mx_1220066 | lineitem_mx | distributed | 16384 + lineitem_mx_1220067 | lineitem_mx | distributed | 16384 + multiple_hash_mx_1220094 | multiple_hash_mx | distributed | 8192 + multiple_hash_mx_1220094 | multiple_hash_mx | distributed | 8192 + multiple_hash_mx_1220094 | multiple_hash_mx | distributed | 8192 + multiple_hash_mx_1220094 | multiple_hash_mx | distributed | 8192 + multiple_hash_mx_1220094 | multiple_hash_mx | distributed | 8192 + multiple_hash_mx_1220094 | multiple_hash_mx | distributed | 8192 + multiple_hash_mx_1220094 | multiple_hash_mx | distributed | 8192 + multiple_hash_mx_1220095 | multiple_hash_mx | distributed | 8192 + mx_ddl_table_1220088 | mx_ddl_table | distributed | 24576 + mx_ddl_table_1220088 | mx_ddl_table | distributed | 24576 + mx_ddl_table_1220088 | mx_ddl_table | distributed | 24576 + mx_ddl_table_1220088 | mx_ddl_table | distributed | 24576 + mx_ddl_table_1220088 | mx_ddl_table | distributed | 24576 + mx_ddl_table_1220088 | mx_ddl_table | distributed | 24576 + mx_ddl_table_1220088 | mx_ddl_table | distributed | 24576 + mx_ddl_table_1220089 | mx_ddl_table | distributed | 24576 + mx_ddl_table_1220090 | mx_ddl_table | distributed | 24576 + mx_ddl_table_1220090 | mx_ddl_table | distributed | 24576 + mx_ddl_table_1220090 | mx_ddl_table | distributed | 24576 + mx_ddl_table_1220090 | mx_ddl_table | distributed | 24576 + mx_ddl_table_1220090 | mx_ddl_table | distributed | 24576 + mx_ddl_table_1220090 | mx_ddl_table | distributed | 24576 + mx_ddl_table_1220090 | mx_ddl_table | distributed | 24576 + mx_ddl_table_1220091 | mx_ddl_table | distributed | 24576 nation_hash_1220000 | nation_hash | distributed | 0 nation_hash_1220000 | nation_hash | distributed | 0 nation_hash_1220000 | nation_hash | distributed | 0 @@ -871,77 +871,77 @@ SELECT shard_name, table_name, citus_table_type, shard_size FROM citus_shards OR nation_mx_1220085 | nation_mx | reference | 0 nation_mx_1220085 | nation_mx | reference | 0 nation_mx_1220085 | nation_mx | reference | 0 - objects_mx_1220103 | objects_mx | distributed | 0 - objects_mx_1220103 | objects_mx | distributed | 0 - objects_mx_1220103 | objects_mx | distributed | 0 - objects_mx_1220103 | objects_mx | distributed | 0 - objects_mx_1220103 | objects_mx | distributed | 0 - objects_mx_1220103 | objects_mx | distributed | 0 - objects_mx_1220103 | objects_mx | distributed | 0 - orders_mx_1220068 | orders_mx | distributed | 0 - orders_mx_1220068 | orders_mx | distributed | 0 - orders_mx_1220068 | orders_mx | distributed | 0 - orders_mx_1220068 | orders_mx | distributed | 0 - orders_mx_1220068 | orders_mx | distributed | 0 - orders_mx_1220068 | orders_mx | distributed | 0 - orders_mx_1220068 | orders_mx | distributed | 0 - orders_mx_1220069 | orders_mx | distributed | 0 - orders_mx_1220070 | orders_mx | distributed | 0 - orders_mx_1220070 | orders_mx | distributed | 0 - orders_mx_1220070 | orders_mx | distributed | 0 - orders_mx_1220070 | orders_mx | distributed | 0 - orders_mx_1220070 | orders_mx | distributed | 0 - orders_mx_1220070 | orders_mx | distributed | 0 - orders_mx_1220070 | orders_mx | distributed | 0 - orders_mx_1220071 | orders_mx | distributed | 0 - orders_mx_1220072 | orders_mx | distributed | 0 - orders_mx_1220072 | orders_mx | distributed | 0 - orders_mx_1220072 | orders_mx | distributed | 0 - orders_mx_1220072 | orders_mx | distributed | 0 - orders_mx_1220072 | orders_mx | distributed | 0 - orders_mx_1220072 | orders_mx | distributed | 0 - orders_mx_1220072 | orders_mx | distributed | 0 - orders_mx_1220073 | orders_mx | distributed | 0 - orders_mx_1220074 | orders_mx | distributed | 0 - orders_mx_1220074 | orders_mx | distributed | 0 - orders_mx_1220074 | orders_mx | distributed | 0 - orders_mx_1220074 | orders_mx | distributed | 0 - orders_mx_1220074 | orders_mx | distributed | 0 - orders_mx_1220074 | orders_mx | distributed | 0 - orders_mx_1220074 | orders_mx | distributed | 0 - orders_mx_1220075 | orders_mx | distributed | 0 - orders_mx_1220076 | orders_mx | distributed | 0 - orders_mx_1220076 | orders_mx | distributed | 0 - orders_mx_1220076 | orders_mx | distributed | 0 - orders_mx_1220076 | orders_mx | distributed | 0 - orders_mx_1220076 | orders_mx | distributed | 0 - orders_mx_1220076 | orders_mx | distributed | 0 - orders_mx_1220076 | orders_mx | distributed | 0 - orders_mx_1220077 | orders_mx | distributed | 0 - orders_mx_1220078 | orders_mx | distributed | 0 - orders_mx_1220078 | orders_mx | distributed | 0 - orders_mx_1220078 | orders_mx | distributed | 0 - orders_mx_1220078 | orders_mx | distributed | 0 - orders_mx_1220078 | orders_mx | distributed | 0 - orders_mx_1220078 | orders_mx | distributed | 0 - orders_mx_1220078 | orders_mx | distributed | 0 - orders_mx_1220079 | orders_mx | distributed | 0 - orders_mx_1220080 | orders_mx | distributed | 0 - orders_mx_1220080 | orders_mx | distributed | 0 - orders_mx_1220080 | orders_mx | distributed | 0 - orders_mx_1220080 | orders_mx | distributed | 0 - orders_mx_1220080 | orders_mx | distributed | 0 - orders_mx_1220080 | orders_mx | distributed | 0 - orders_mx_1220080 | orders_mx | distributed | 0 - orders_mx_1220081 | orders_mx | distributed | 0 - orders_mx_1220082 | orders_mx | distributed | 0 - orders_mx_1220082 | orders_mx | distributed | 0 - orders_mx_1220082 | orders_mx | distributed | 0 - orders_mx_1220082 | orders_mx | distributed | 0 - orders_mx_1220082 | orders_mx | distributed | 0 - orders_mx_1220082 | orders_mx | distributed | 0 - orders_mx_1220082 | orders_mx | distributed | 0 - orders_mx_1220083 | orders_mx | distributed | 0 + objects_mx_1220103 | objects_mx | distributed | 16384 + objects_mx_1220103 | objects_mx | distributed | 16384 + objects_mx_1220103 | objects_mx | distributed | 16384 + objects_mx_1220103 | objects_mx | distributed | 16384 + objects_mx_1220103 | objects_mx | distributed | 16384 + objects_mx_1220103 | objects_mx | distributed | 16384 + objects_mx_1220103 | objects_mx | distributed | 16384 + orders_mx_1220068 | orders_mx | distributed | 8192 + orders_mx_1220068 | orders_mx | distributed | 8192 + orders_mx_1220068 | orders_mx | distributed | 8192 + orders_mx_1220068 | orders_mx | distributed | 8192 + orders_mx_1220068 | orders_mx | distributed | 8192 + orders_mx_1220068 | orders_mx | distributed | 8192 + orders_mx_1220068 | orders_mx | distributed | 8192 + orders_mx_1220069 | orders_mx | distributed | 8192 + orders_mx_1220070 | orders_mx | distributed | 8192 + orders_mx_1220070 | orders_mx | distributed | 8192 + orders_mx_1220070 | orders_mx | distributed | 8192 + orders_mx_1220070 | orders_mx | distributed | 8192 + orders_mx_1220070 | orders_mx | distributed | 8192 + orders_mx_1220070 | orders_mx | distributed | 8192 + orders_mx_1220070 | orders_mx | distributed | 8192 + orders_mx_1220071 | orders_mx | distributed | 8192 + orders_mx_1220072 | orders_mx | distributed | 8192 + orders_mx_1220072 | orders_mx | distributed | 8192 + orders_mx_1220072 | orders_mx | distributed | 8192 + orders_mx_1220072 | orders_mx | distributed | 8192 + orders_mx_1220072 | orders_mx | distributed | 8192 + orders_mx_1220072 | orders_mx | distributed | 8192 + orders_mx_1220072 | orders_mx | distributed | 8192 + orders_mx_1220073 | orders_mx | distributed | 8192 + orders_mx_1220074 | orders_mx | distributed | 8192 + orders_mx_1220074 | orders_mx | distributed | 8192 + orders_mx_1220074 | orders_mx | distributed | 8192 + orders_mx_1220074 | orders_mx | distributed | 8192 + orders_mx_1220074 | orders_mx | distributed | 8192 + orders_mx_1220074 | orders_mx | distributed | 8192 + orders_mx_1220074 | orders_mx | distributed | 8192 + orders_mx_1220075 | orders_mx | distributed | 8192 + orders_mx_1220076 | orders_mx | distributed | 8192 + orders_mx_1220076 | orders_mx | distributed | 8192 + orders_mx_1220076 | orders_mx | distributed | 8192 + orders_mx_1220076 | orders_mx | distributed | 8192 + orders_mx_1220076 | orders_mx | distributed | 8192 + orders_mx_1220076 | orders_mx | distributed | 8192 + orders_mx_1220076 | orders_mx | distributed | 8192 + orders_mx_1220077 | orders_mx | distributed | 8192 + orders_mx_1220078 | orders_mx | distributed | 8192 + orders_mx_1220078 | orders_mx | distributed | 8192 + orders_mx_1220078 | orders_mx | distributed | 8192 + orders_mx_1220078 | orders_mx | distributed | 8192 + orders_mx_1220078 | orders_mx | distributed | 8192 + orders_mx_1220078 | orders_mx | distributed | 8192 + orders_mx_1220078 | orders_mx | distributed | 8192 + orders_mx_1220079 | orders_mx | distributed | 8192 + orders_mx_1220080 | orders_mx | distributed | 8192 + orders_mx_1220080 | orders_mx | distributed | 8192 + orders_mx_1220080 | orders_mx | distributed | 8192 + orders_mx_1220080 | orders_mx | distributed | 8192 + orders_mx_1220080 | orders_mx | distributed | 8192 + orders_mx_1220080 | orders_mx | distributed | 8192 + orders_mx_1220080 | orders_mx | distributed | 8192 + orders_mx_1220081 | orders_mx | distributed | 8192 + orders_mx_1220082 | orders_mx | distributed | 8192 + orders_mx_1220082 | orders_mx | distributed | 8192 + orders_mx_1220082 | orders_mx | distributed | 8192 + orders_mx_1220082 | orders_mx | distributed | 8192 + orders_mx_1220082 | orders_mx | distributed | 8192 + orders_mx_1220082 | orders_mx | distributed | 8192 + orders_mx_1220082 | orders_mx | distributed | 8192 + orders_mx_1220083 | orders_mx | distributed | 8192 part_mx_1220086 | part_mx | reference | 0 part_mx_1220086 | part_mx | reference | 0 part_mx_1220086 | part_mx | reference | 0 @@ -950,14 +950,14 @@ SELECT shard_name, table_name, citus_table_type, shard_size FROM citus_shards OR part_mx_1220086 | part_mx | reference | 0 part_mx_1220086 | part_mx | reference | 0 part_mx_1220086 | part_mx | reference | 0 - researchers_mx_1220100 | researchers_mx | distributed | 0 - researchers_mx_1220100 | researchers_mx | distributed | 0 - researchers_mx_1220100 | researchers_mx | distributed | 0 - researchers_mx_1220100 | researchers_mx | distributed | 0 - researchers_mx_1220100 | researchers_mx | distributed | 0 - researchers_mx_1220100 | researchers_mx | distributed | 0 - researchers_mx_1220100 | researchers_mx | distributed | 0 - researchers_mx_1220101 | researchers_mx | distributed | 0 + researchers_mx_1220100 | researchers_mx | distributed | 8192 + researchers_mx_1220100 | researchers_mx | distributed | 8192 + researchers_mx_1220100 | researchers_mx | distributed | 8192 + researchers_mx_1220100 | researchers_mx | distributed | 8192 + researchers_mx_1220100 | researchers_mx | distributed | 8192 + researchers_mx_1220100 | researchers_mx | distributed | 8192 + researchers_mx_1220100 | researchers_mx | distributed | 8192 + researchers_mx_1220101 | researchers_mx | distributed | 8192 supplier_mx_1220087 | supplier_mx | reference | 0 supplier_mx_1220087 | supplier_mx | reference | 0 supplier_mx_1220087 | supplier_mx | reference | 0 diff --git a/src/test/regress/expected/multi_mx_hide_shard_names.out b/src/test/regress/expected/multi_mx_hide_shard_names.out index c3dbe3bdb..01d9736f2 100644 --- a/src/test/regress/expected/multi_mx_hide_shard_names.out +++ b/src/test/regress/expected/multi_mx_hide_shard_names.out @@ -444,7 +444,7 @@ SELECT relname FROM pg_catalog.pg_class WHERE relnamespace = 'mx_hide_shard_name (4 rows) -- or, we set it to walsender --- the shards and indexes do show up +-- the shards and indexes do not show up SELECT set_backend_type(9); NOTICE: backend type switched to: walsender set_backend_type @@ -452,6 +452,17 @@ NOTICE: backend type switched to: walsender (1 row) +SELECT relname FROM pg_catalog.pg_class WHERE relnamespace = 'mx_hide_shard_names'::regnamespace ORDER BY relname; + relname +--------------------------------------------------------------------- + test_index + test_table + test_table_102008 + test_table_2_1130000 +(4 rows) + +-- unless the application name starts with citus_shard +SET application_name = 'citus_shard_move'; SELECT relname FROM pg_catalog.pg_class WHERE relnamespace = 'mx_hide_shard_names'::regnamespace ORDER BY relname; relname --------------------------------------------------------------------- @@ -467,6 +478,7 @@ SELECT relname FROM pg_catalog.pg_class WHERE relnamespace = 'mx_hide_shard_name test_table_2_1130000 (10 rows) +RESET application_name; -- but, client backends to see the shards SELECT set_backend_type(3); NOTICE: backend type switched to: client backend diff --git a/src/test/regress/expected/multi_mx_modifications.out b/src/test/regress/expected/multi_mx_modifications.out index 276766c30..9e053d3f2 100644 --- a/src/test/regress/expected/multi_mx_modifications.out +++ b/src/test/regress/expected/multi_mx_modifications.out @@ -95,7 +95,7 @@ INSERT INTO limit_orders_mx VALUES (random() * 100, 'ORCL', 152, '2011-08-25 11: INSERT INTO limit_orders_mx VALUES (2036, 'GOOG', 5634, now(), 'buy', random()); -- commands with mutable functions in their quals DELETE FROM limit_orders_mx WHERE id = 246 AND bidder_id = (random() * 1000); -ERROR: functions used in the WHERE clause of modification queries on distributed tables must not be VOLATILE +ERROR: functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE -- commands with mutable but non-volatile functions(ie: stable func.) in their quals -- (the cast to timestamp is because the timestamp_eq_timestamptz operator is stable) DELETE FROM limit_orders_mx WHERE id = 246 AND placed_at = current_timestamp::timestamp; diff --git a/src/test/regress/expected/multi_mx_modifying_xacts.out b/src/test/regress/expected/multi_mx_modifying_xacts.out index e486b8b1b..a9013889f 100644 --- a/src/test/regress/expected/multi_mx_modifying_xacts.out +++ b/src/test/regress/expected/multi_mx_modifying_xacts.out @@ -129,12 +129,25 @@ BEGIN; INSERT INTO researchers_mx VALUES (8, 5, 'Douglas Engelbart'); INSERT INTO labs_mx VALUES (5, 'Los Alamos'); COMMIT; -SELECT * FROM researchers_mx, labs_mx WHERE labs_mx.id = researchers_mx.lab_id and researchers_mx.lab_id = 5;; +SET citus.enable_non_colocated_router_query_pushdown TO ON; +SELECT * FROM researchers_mx, labs_mx WHERE labs_mx.id = researchers_mx.lab_id and researchers_mx.lab_id = 5 ORDER BY 1,2,3,4,5; id | lab_id | name | id | name --------------------------------------------------------------------- 8 | 5 | Douglas Engelbart | 5 | Los Alamos (1 row) +SET citus.enable_non_colocated_router_query_pushdown TO OFF; +-- fails because researchers and labs are not colocated +SELECT * FROM researchers_mx, labs_mx WHERE labs_mx.id = researchers_mx.lab_id and researchers_mx.lab_id = 5; +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- works thanks to "OFFSET 0" trick +SELECT * FROM (SELECT * FROM researchers_mx OFFSET 0) researchers_mx, labs_mx WHERE labs_mx.id = researchers_mx.lab_id and researchers_mx.lab_id = 5 ORDER BY 1,2,3,4,5; + id | lab_id | name | id | name +--------------------------------------------------------------------- + 8 | 5 | Douglas Engelbart | 5 | Los Alamos +(1 row) + +RESET citus.enable_non_colocated_router_query_pushdown; -- and the other way around is also allowed BEGIN; SET LOCAL citus.enable_local_execution TO off; @@ -148,7 +161,8 @@ BEGIN; INSERT INTO researchers_mx VALUES (8, 5, 'Douglas Engelbart'); INSERT INTO labs_mx VALUES (5, 'Los Alamos'); COMMIT; -SELECT * FROM researchers_mx, labs_mx WHERE labs_mx.id = researchers_mx.lab_id and researchers_mx.lab_id = 5; +SET citus.enable_non_colocated_router_query_pushdown TO ON; +SELECT * FROM researchers_mx, labs_mx WHERE labs_mx.id = researchers_mx.lab_id and researchers_mx.lab_id = 5 ORDER BY 1,2,3,4,5; id | lab_id | name | id | name --------------------------------------------------------------------- 8 | 5 | Douglas Engelbart | 5 | Los Alamos @@ -157,6 +171,21 @@ SELECT * FROM researchers_mx, labs_mx WHERE labs_mx.id = researchers_mx.lab_id a 8 | 5 | Douglas Engelbart | 5 | Los Alamos (4 rows) +SET citus.enable_non_colocated_router_query_pushdown TO OFF; +-- fails because researchers and labs are not colocated +SELECT * FROM researchers_mx, labs_mx WHERE labs_mx.id = researchers_mx.lab_id and researchers_mx.lab_id = 5; +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- works thanks to "OFFSET 0" trick +SELECT * FROM (SELECT * FROM researchers_mx OFFSET 0) researchers_mx, labs_mx WHERE labs_mx.id = researchers_mx.lab_id and researchers_mx.lab_id = 5 ORDER BY 1,2,3,4,5; + id | lab_id | name | id | name +--------------------------------------------------------------------- + 8 | 5 | Douglas Engelbart | 5 | Los Alamos + 8 | 5 | Douglas Engelbart | 5 | Los Alamos + 8 | 5 | Douglas Engelbart | 5 | Los Alamos + 8 | 5 | Douglas Engelbart | 5 | Los Alamos +(4 rows) + +RESET citus.enable_non_colocated_router_query_pushdown; -- and the other way around is also allowed BEGIN; SET LOCAL citus.enable_local_execution TO off; @@ -406,3 +435,6 @@ SELECT * FROM labs_mx WHERE id = 8; --------------------------------------------------------------------- (0 rows) +TRUNCATE objects_mx, labs_mx, researchers_mx; +DROP TRIGGER reject_bad_mx ON labs_mx_1220102; +DROP FUNCTION reject_bad_mx; diff --git a/src/test/regress/expected/multi_mx_router_planner.out b/src/test/regress/expected/multi_mx_router_planner.out index d006b4bb8..cba5b8181 100644 --- a/src/test/regress/expected/multi_mx_router_planner.out +++ b/src/test/regress/expected/multi_mx_router_planner.out @@ -586,11 +586,13 @@ DEBUG: query has a single distribution column value: 10 (3 rows) -- following join is router plannable since the same worker --- has both shards +-- has both shards when citus.enable_non_colocated_router_query_pushdown +-- is enabled +SET citus.enable_non_colocated_router_query_pushdown TO ON; SELECT a.author_id as first_author, b.word_count as second_word_count FROM articles_hash_mx a, articles_single_shard_hash_mx b WHERE a.author_id = 10 and a.author_id = b.author_id - LIMIT 3; + ORDER by 1,2 LIMIT 3; DEBUG: Creating router plan DEBUG: query has a single distribution column value: 10 first_author | second_word_count @@ -600,6 +602,45 @@ DEBUG: query has a single distribution column value: 10 10 | 19519 (3 rows) +SET citus.enable_non_colocated_router_query_pushdown TO OFF; +-- but this is not the case otherwise +SELECT a.author_id as first_author, b.word_count as second_word_count + FROM articles_hash_mx a, articles_single_shard_hash_mx b + WHERE a.author_id = 10 and a.author_id = b.author_id + ORDER by 1,2 LIMIT 3; +DEBUG: router planner does not support queries that reference non-colocated distributed tables +DEBUG: push down of limit count: 3 +DEBUG: join prunable for task partitionId 0 and 1 +DEBUG: join prunable for task partitionId 0 and 2 +DEBUG: join prunable for task partitionId 0 and 3 +DEBUG: join prunable for task partitionId 1 and 0 +DEBUG: join prunable for task partitionId 1 and 2 +DEBUG: join prunable for task partitionId 1 and 3 +DEBUG: join prunable for task partitionId 2 and 0 +DEBUG: join prunable for task partitionId 2 and 1 +DEBUG: join prunable for task partitionId 2 and 3 +DEBUG: join prunable for task partitionId 3 and 0 +DEBUG: join prunable for task partitionId 3 and 1 +DEBUG: join prunable for task partitionId 3 and 2 +DEBUG: pruning merge fetch taskId 1 +DETAIL: Creating dependency on merge taskId 2 +DEBUG: pruning merge fetch taskId 2 +DETAIL: Creating dependency on merge taskId 2 +DEBUG: pruning merge fetch taskId 4 +DETAIL: Creating dependency on merge taskId 4 +DEBUG: pruning merge fetch taskId 5 +DETAIL: Creating dependency on merge taskId 4 +DEBUG: pruning merge fetch taskId 7 +DETAIL: Creating dependency on merge taskId 6 +DEBUG: pruning merge fetch taskId 8 +DETAIL: Creating dependency on merge taskId 6 +DEBUG: pruning merge fetch taskId 10 +DETAIL: Creating dependency on merge taskId 8 +DEBUG: pruning merge fetch taskId 11 +DETAIL: Creating dependency on merge taskId 8 +ERROR: the query contains a join that requires repartitioning +HINT: Set citus.enable_repartition_joins to on to enable repartitioning +RESET citus.enable_non_colocated_router_query_pushdown; -- following join is not router plannable since there are no -- workers containing both shards, but will work through recursive -- planning @@ -1460,3 +1501,7 @@ DEBUG: query has a single distribution column value: 1 51 (6 rows) +SET client_min_messages to WARNING; +TRUNCATE articles_hash_mx, company_employees_mx, articles_single_shard_hash_mx; +DROP MATERIALIZED VIEW mv_articles_hash_mx_error; +DROP TABLE authors_hash_mx; diff --git a/src/test/regress/expected/multi_mx_schema_support.out b/src/test/regress/expected/multi_mx_schema_support.out index 2037a670f..4e61d85d8 100644 --- a/src/test/regress/expected/multi_mx_schema_support.out +++ b/src/test/regress/expected/multi_mx_schema_support.out @@ -74,6 +74,7 @@ FETCH BACKWARD test_cursor; END; -- test inserting to table in different schema SET search_path TO public; +DELETE from citus_mx_test_schema.nation_hash where n_nationkey = 100; -- allow rerunning this file INSERT INTO citus_mx_test_schema.nation_hash(n_nationkey, n_name, n_regionkey) VALUES (100, 'TURKEY', 3); -- verify insertion SELECT * FROM citus_mx_test_schema.nation_hash WHERE n_nationkey = 100; @@ -84,6 +85,7 @@ SELECT * FROM citus_mx_test_schema.nation_hash WHERE n_nationkey = 100; -- test with search_path is set SET search_path TO citus_mx_test_schema; +DELETE from nation_hash where n_nationkey = 101; -- allow rerunning this file INSERT INTO nation_hash(n_nationkey, n_name, n_regionkey) VALUES (101, 'GERMANY', 3); -- verify insertion SELECT * FROM nation_hash WHERE n_nationkey = 101; @@ -458,14 +460,14 @@ SELECT create_distributed_table('mx_old_schema.table_set_schema', 'id'); (1 row) CREATE SCHEMA mx_new_schema; -SELECT objid::oid::regnamespace as "Distributed Schemas" +SELECT objid::oid::regnamespace::text as "Distributed Schemas" FROM pg_catalog.pg_dist_object WHERE objid::oid::regnamespace IN ('mx_old_schema', 'mx_new_schema') ORDER BY "Distributed Schemas"; Distributed Schemas --------------------------------------------------------------------- - mx_old_schema mx_new_schema + mx_old_schema (2 rows) \c - - - :worker_1_port @@ -490,13 +492,14 @@ ERROR: operation is not allowed on this node HINT: Connect to the coordinator and run it again. \c - - - :master_port ALTER TABLE mx_old_schema.table_set_schema SET SCHEMA mx_new_schema; -SELECT objid::oid::regnamespace as "Distributed Schemas" +SELECT objid::oid::regnamespace::text as "Distributed Schemas" FROM pg_catalog.pg_dist_object - WHERE objid::oid::regnamespace IN ('mx_old_schema', 'mx_new_schema'); + WHERE objid::oid::regnamespace IN ('mx_old_schema', 'mx_new_schema') + ORDER BY "Distributed Schemas"; Distributed Schemas --------------------------------------------------------------------- - mx_old_schema mx_new_schema + mx_old_schema (2 rows) \c - - - :worker_1_port @@ -542,7 +545,7 @@ SET client_min_messages TO ERROR; CREATE ROLE schema_owner WITH LOGIN; RESET client_min_messages; SELECT run_command_on_workers($$SET citus.enable_ddl_propagation TO OFF;CREATE ROLE schema_owner WITH LOGIN;RESET citus.enable_ddl_propagation;$$); - run_command_on_workers + run_command_on_workers --------------------------------------------------------------------- (localhost,57637,t,SET) (localhost,57638,t,SET) @@ -567,7 +570,7 @@ SET client_min_messages TO ERROR; CREATE ROLE role_to_be_granted WITH LOGIN; RESET client_min_messages; SELECT run_command_on_workers($$SET citus.enable_ddl_propagation TO OFF;CREATE ROLE role_to_be_granted WITH LOGIN;RESET citus.enable_ddl_propagation;$$); - run_command_on_workers + run_command_on_workers --------------------------------------------------------------------- (localhost,57637,t,SET) (localhost,57638,t,SET) @@ -616,3 +619,6 @@ NOTICE: drop cascades to table new_schema.t1 DROP SCHEMA mx_old_schema CASCADE; DROP SCHEMA mx_new_schema CASCADE; NOTICE: drop cascades to table mx_new_schema.table_set_schema +DROP SCHEMA localschema; +DROP ROLE schema_owner; +DROP ROLE role_to_be_granted; diff --git a/src/test/regress/expected/multi_poolinfo_usage.out b/src/test/regress/expected/multi_poolinfo_usage.out index c5e97ec95..b428409ff 100644 --- a/src/test/regress/expected/multi_poolinfo_usage.out +++ b/src/test/regress/expected/multi_poolinfo_usage.out @@ -16,7 +16,7 @@ SELECT nodeid AS worker_1_id FROM pg_dist_node WHERE nodename = 'localhost' AND SELECT nodeid AS worker_2_id FROM pg_dist_node WHERE nodename = 'localhost' AND nodeport = :worker_2_port; worker_2_id --------------------------------------------------------------------- - 18 + 35 (1 row) \gset diff --git a/src/test/regress/expected/multi_router_planner.out b/src/test/regress/expected/multi_router_planner.out index 56ff44b3b..1553309d2 100644 --- a/src/test/regress/expected/multi_router_planner.out +++ b/src/test/regress/expected/multi_router_planner.out @@ -6,6 +6,8 @@ SET citus.next_shard_id TO 840000; -- router planner, so we're disabling it in this file. We've bunch of -- other tests that triggers fast-path-router planner SET citus.enable_fast_path_router_planner TO false; +CREATE SCHEMA multi_router_planner; +SET search_path TO multi_router_planner; CREATE TABLE articles_hash ( id bigint NOT NULL, author_id bigint NOT NULL, @@ -290,10 +292,10 @@ WITH first_author AS MATERIALIZED ( UPDATE articles_hash SET title = first_author.name FROM first_author WHERE articles_hash.author_id = 2 AND articles_hash.id = first_author.id; DEBUG: Router planner doesn't support SELECT FOR UPDATE in common table expressions involving reference tables. -DEBUG: generating subplan XXX_1 for CTE first_author: SELECT articles_hash.id, auref.name FROM public.articles_hash, public.authors_reference auref WHERE ((articles_hash.author_id OPERATOR(pg_catalog.=) 2) AND (auref.id OPERATOR(pg_catalog.=) articles_hash.author_id)) FOR UPDATE OF articles_hash FOR UPDATE OF auref +DEBUG: generating subplan XXX_1 for CTE first_author: SELECT articles_hash.id, auref.name FROM multi_router_planner.articles_hash, multi_router_planner.authors_reference auref WHERE ((articles_hash.author_id OPERATOR(pg_catalog.=) 2) AND (auref.id OPERATOR(pg_catalog.=) articles_hash.author_id)) FOR UPDATE OF articles_hash FOR UPDATE OF auref DEBUG: Creating router plan DEBUG: query has a single distribution column value: 2 -DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE public.articles_hash SET title = first_author.name FROM (SELECT intermediate_result.id, intermediate_result.name FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, name character varying(20))) first_author WHERE ((articles_hash.author_id OPERATOR(pg_catalog.=) 2) AND (articles_hash.id OPERATOR(pg_catalog.=) first_author.id)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE multi_router_planner.articles_hash SET title = first_author.name FROM (SELECT intermediate_result.id, intermediate_result.name FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, name character varying(20))) first_author WHERE ((articles_hash.author_id OPERATOR(pg_catalog.=) 2) AND (articles_hash.id OPERATOR(pg_catalog.=) first_author.id)) DEBUG: Creating router plan DEBUG: query has a single distribution column value: 2 WITH first_author AS MATERIALIZED ( @@ -356,10 +358,10 @@ WITH id_author AS MATERIALIZED ( SELECT id, author_id FROM articles_hash WHERE a id_title AS MATERIALIZED (SELECT id, title from articles_hash WHERE author_id = 2) SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; DEBUG: cannot run command which targets multiple shards -DEBUG: generating subplan XXX_1 for CTE id_author: SELECT id, author_id FROM public.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 1) +DEBUG: generating subplan XXX_1 for CTE id_author: SELECT id, author_id FROM multi_router_planner.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 1) DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 -DEBUG: generating subplan XXX_2 for CTE id_title: SELECT id, title FROM public.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 2) +DEBUG: generating subplan XXX_2 for CTE id_title: SELECT id, title FROM multi_router_planner.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 2) DEBUG: Creating router plan DEBUG: query has a single distribution column value: 2 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id_author.id, id_author.author_id, id_title.id, id_title.title FROM (SELECT intermediate_result.id, intermediate_result.author_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint)) id_author, (SELECT intermediate_result.id, intermediate_result.title FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, title character varying(20))) id_title WHERE (id_author.id OPERATOR(pg_catalog.=) id_title.id) @@ -456,7 +458,7 @@ WITH new_article AS MATERIALIZED( ) SELECT * FROM new_article; DEBUG: only SELECT, UPDATE, or DELETE common table expressions may be router planned -DEBUG: generating subplan XXX_1 for CTE new_article: INSERT INTO public.articles_hash (id, author_id, title, word_count) VALUES (1, 1, 'arsenous'::character varying, 9) RETURNING id, author_id, title, word_count +DEBUG: generating subplan XXX_1 for CTE new_article: INSERT INTO multi_router_planner.articles_hash (id, author_id, title, word_count) VALUES (1, 1, 'arsenous'::character varying, 9) RETURNING id, author_id, title, word_count DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id, author_id, title, word_count FROM (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) new_article @@ -471,7 +473,7 @@ WITH update_article AS MATERIALIZED( ) SELECT * FROM update_article; DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE public.articles_hash SET word_count = 10 WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 9)) RETURNING id, author_id, title, word_count +DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE multi_router_planner.articles_hash SET word_count = 10 WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 9)) RETURNING id, author_id, title, word_count DEBUG: Creating router plan DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id, author_id, title, word_count FROM (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) update_article DEBUG: Creating router plan @@ -485,7 +487,7 @@ WITH update_article AS MATERIALIZED ( ) SELECT coalesce(1,random()); DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE public.articles_hash SET word_count = 11 WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 10)) RETURNING id, author_id, title, word_count +DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE multi_router_planner.articles_hash SET word_count = 11 WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 10)) RETURNING id, author_id, title, word_count DEBUG: Creating router plan DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT COALESCE((1)::double precision, random()) AS "coalesce" DEBUG: Creating router plan @@ -510,7 +512,7 @@ WITH update_article AS MATERIALIZED ( ) SELECT coalesce(1,random()); DEBUG: cannot router plan modification of a non-distributed table -DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE public.authors_reference SET name = ''::character varying WHERE (id OPERATOR(pg_catalog.=) 0) RETURNING name, id +DEBUG: generating subplan XXX_1 for CTE update_article: UPDATE multi_router_planner.authors_reference SET name = ''::character varying WHERE (id OPERATOR(pg_catalog.=) 0) RETURNING name, id DEBUG: Creating router plan DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT COALESCE((1)::double precision, random()) AS "coalesce" DEBUG: Creating router plan @@ -524,7 +526,7 @@ WITH delete_article AS MATERIALIZED ( ) SELECT * FROM delete_article; DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE delete_article: DELETE FROM public.articles_hash WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 10)) RETURNING id, author_id, title, word_count +DEBUG: generating subplan XXX_1 for CTE delete_article: DELETE FROM multi_router_planner.articles_hash WHERE ((id OPERATOR(pg_catalog.=) 1) AND (word_count OPERATOR(pg_catalog.=) 10)) RETURNING id, author_id, title, word_count DEBUG: Creating router plan DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id, author_id, title, word_count FROM (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) delete_article DEBUG: Creating router plan @@ -653,8 +655,8 @@ FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test WHERE tes ORDER BY test.word_count DESC, articles_hash.id LIMIT 5; DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for subquery SELECT id, word_count FROM public.articles_hash -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM public.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE (test.id OPERATOR(pg_catalog.=) articles_hash.id) ORDER BY test.word_count DESC, articles_hash.id LIMIT 5 +DEBUG: generating subplan XXX_1 for subquery SELECT id, word_count FROM multi_router_planner.articles_hash +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM multi_router_planner.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE (test.id OPERATOR(pg_catalog.=) articles_hash.id) ORDER BY test.word_count DESC, articles_hash.id LIMIT 5 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: push down of limit count: 5 id | word_count @@ -672,8 +674,8 @@ WHERE test.id = articles_hash.id and articles_hash.author_id = 1 ORDER BY articles_hash.id; DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for subquery SELECT id, word_count FROM public.articles_hash -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM public.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE ((test.id OPERATOR(pg_catalog.=) articles_hash.id) AND (articles_hash.author_id OPERATOR(pg_catalog.=) 1)) ORDER BY articles_hash.id +DEBUG: generating subplan XXX_1 for subquery SELECT id, word_count FROM multi_router_planner.articles_hash +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM multi_router_planner.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE ((test.id OPERATOR(pg_catalog.=) articles_hash.id) AND (articles_hash.author_id OPERATOR(pg_catalog.=) 1)) ORDER BY articles_hash.id DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 id | word_count @@ -765,11 +767,13 @@ DEBUG: query has a single distribution column value: 10 (3 rows) -- following join is router plannable since the same worker --- has both shards +-- has both shards when citus.enable_non_colocated_router_query_pushdown +-- is enabled +SET citus.enable_non_colocated_router_query_pushdown TO ON; SELECT a.author_id as first_author, b.word_count as second_word_count FROM articles_hash a, articles_single_shard_hash b WHERE a.author_id = 10 and a.author_id = b.author_id - LIMIT 3; + ORDER BY 1,2 LIMIT 3; DEBUG: Creating router plan DEBUG: query has a single distribution column value: 10 first_author | second_word_count @@ -779,6 +783,45 @@ DEBUG: query has a single distribution column value: 10 10 | 19519 (3 rows) +SET citus.enable_non_colocated_router_query_pushdown TO OFF; +-- but this is not the case otherwise +SELECT a.author_id as first_author, b.word_count as second_word_count + FROM articles_hash a, articles_single_shard_hash b + WHERE a.author_id = 10 and a.author_id = b.author_id + ORDER BY 1,2 LIMIT 3; +DEBUG: router planner does not support queries that reference non-colocated distributed tables +DEBUG: push down of limit count: 3 +DEBUG: join prunable for task partitionId 0 and 1 +DEBUG: join prunable for task partitionId 0 and 2 +DEBUG: join prunable for task partitionId 0 and 3 +DEBUG: join prunable for task partitionId 1 and 0 +DEBUG: join prunable for task partitionId 1 and 2 +DEBUG: join prunable for task partitionId 1 and 3 +DEBUG: join prunable for task partitionId 2 and 0 +DEBUG: join prunable for task partitionId 2 and 1 +DEBUG: join prunable for task partitionId 2 and 3 +DEBUG: join prunable for task partitionId 3 and 0 +DEBUG: join prunable for task partitionId 3 and 1 +DEBUG: join prunable for task partitionId 3 and 2 +DEBUG: pruning merge fetch taskId 1 +DETAIL: Creating dependency on merge taskId 2 +DEBUG: pruning merge fetch taskId 2 +DETAIL: Creating dependency on merge taskId 2 +DEBUG: pruning merge fetch taskId 4 +DETAIL: Creating dependency on merge taskId 4 +DEBUG: pruning merge fetch taskId 5 +DETAIL: Creating dependency on merge taskId 4 +DEBUG: pruning merge fetch taskId 7 +DETAIL: Creating dependency on merge taskId 6 +DEBUG: pruning merge fetch taskId 8 +DETAIL: Creating dependency on merge taskId 6 +DEBUG: pruning merge fetch taskId 10 +DETAIL: Creating dependency on merge taskId 8 +DEBUG: pruning merge fetch taskId 11 +DETAIL: Creating dependency on merge taskId 8 +ERROR: the query contains a join that requires repartitioning +HINT: Set citus.enable_repartition_joins to on to enable repartitioning +RESET citus.enable_non_colocated_router_query_pushdown; -- following join is not router plannable since there are no -- workers containing both shards, but will work through recursive -- planning @@ -788,9 +831,9 @@ SELECT a.author_id as first_author, b.word_count as second_word_count WHERE a.author_id = 2 and a.author_id = b.author_id LIMIT 3; DEBUG: found no worker with all shard placements -DEBUG: generating subplan XXX_1 for CTE single_shard: SELECT id, author_id, title, word_count FROM public.articles_single_shard_hash +DEBUG: generating subplan XXX_1 for CTE single_shard: SELECT id, author_id, title, word_count FROM multi_router_planner.articles_single_shard_hash DEBUG: Creating router plan -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a.author_id AS first_author, b.word_count AS second_word_count FROM public.articles_hash a, (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) b WHERE ((a.author_id OPERATOR(pg_catalog.=) 2) AND (a.author_id OPERATOR(pg_catalog.=) b.author_id)) LIMIT 3 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a.author_id AS first_author, b.word_count AS second_word_count FROM multi_router_planner.articles_hash a, (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) b WHERE ((a.author_id OPERATOR(pg_catalog.=) 2) AND (a.author_id OPERATOR(pg_catalog.=) b.author_id)) LIMIT 3 DEBUG: Creating router plan DEBUG: query has a single distribution column value: 2 first_author | second_word_count @@ -1418,6 +1461,11 @@ DEBUG: Creating router plan --------------------------------------------------------------------- (0 rows) +-- Even if the where clause contains "false", the query is not router +-- plannable when citus.enable_non_colocated_router_query_pushdown +-- is disabled. This is because, the tables are not colocated. +SET citus.enable_non_colocated_router_query_pushdown TO ON; +-- the same query, router plannable SELECT a.author_id as first_author, b.word_count as second_word_count FROM articles_hash a, articles_single_shard_hash b WHERE a.author_id = 10 and a.author_id = b.author_id and false; @@ -1426,6 +1474,17 @@ DEBUG: Creating router plan --------------------------------------------------------------------- (0 rows) +SET citus.enable_non_colocated_router_query_pushdown TO OFF; +-- the same query, _not_ router plannable +SELECT a.author_id as first_author, b.word_count as second_word_count + FROM articles_hash a, articles_single_shard_hash b + WHERE a.author_id = 10 and a.author_id = b.author_id and false; +DEBUG: router planner does not support queries that reference non-colocated distributed tables + first_author | second_word_count +--------------------------------------------------------------------- +(0 rows) + +RESET citus.enable_non_colocated_router_query_pushdown; SELECT * FROM articles_hash WHERE null; @@ -1575,10 +1634,10 @@ SELECT 1 FROM authors_reference r JOIN ( ) num_db ON (r.id = num_db.datid) LIMIT 1; DEBUG: found no worker with all shard placements DEBUG: function does not have co-located tables -DEBUG: generating subplan XXX_1 for subquery SELECT datid FROM public.number1() s(datid) +DEBUG: generating subplan XXX_1 for subquery SELECT datid FROM multi_router_planner.number1() s(datid) DEBUG: Creating router plan DEBUG: generating subplan XXX_2 for subquery SELECT s.datid FROM ((SELECT intermediate_result.datid FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) s LEFT JOIN pg_database d ON (((s.datid)::oid OPERATOR(pg_catalog.=) d.oid))) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (public.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) num_db ON ((r.id OPERATOR(pg_catalog.=) num_db.datid))) LIMIT 1 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (multi_router_planner.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) num_db ON ((r.id OPERATOR(pg_catalog.=) num_db.datid))) LIMIT 1 DEBUG: Creating router plan ?column? --------------------------------------------------------------------- @@ -1590,10 +1649,10 @@ SELECT s.datid FROM number1() s LEFT JOIN pg_database d ON s.datid = d.oid; SELECT 1 FROM authors_reference r JOIN num_db ON (r.id = num_db.datid) LIMIT 1; DEBUG: found no worker with all shard placements DEBUG: function does not have co-located tables -DEBUG: generating subplan XXX_1 for subquery SELECT datid FROM public.number1() s(datid) +DEBUG: generating subplan XXX_1 for subquery SELECT datid FROM multi_router_planner.number1() s(datid) DEBUG: Creating router plan DEBUG: generating subplan XXX_2 for subquery SELECT s.datid FROM ((SELECT intermediate_result.datid FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) s LEFT JOIN pg_database d ON (((s.datid)::oid OPERATOR(pg_catalog.=) d.oid))) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (public.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) num_db ON ((r.id OPERATOR(pg_catalog.=) num_db.datid))) LIMIT 1 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (multi_router_planner.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) num_db ON ((r.id OPERATOR(pg_catalog.=) num_db.datid))) LIMIT 1 DEBUG: Creating router plan ?column? --------------------------------------------------------------------- @@ -1603,8 +1662,8 @@ DEBUG: Creating router plan WITH cte AS MATERIALIZED (SELECT * FROM num_db) SELECT 1 FROM authors_reference r JOIN cte ON (r.id = cte.datid) LIMIT 1; DEBUG: found no worker with all shard placements -DEBUG: generating subplan XXX_1 for CTE cte: SELECT datid FROM (SELECT s.datid FROM (public.number1() s(datid) LEFT JOIN pg_database d ON (((s.datid)::oid OPERATOR(pg_catalog.=) d.oid)))) num_db -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (public.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) cte ON ((r.id OPERATOR(pg_catalog.=) cte.datid))) LIMIT 1 +DEBUG: generating subplan XXX_1 for CTE cte: SELECT datid FROM (SELECT s.datid FROM (multi_router_planner.number1() s(datid) LEFT JOIN pg_database d ON (((s.datid)::oid OPERATOR(pg_catalog.=) d.oid)))) num_db +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 FROM (multi_router_planner.authors_reference r JOIN (SELECT intermediate_result.datid FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(datid integer)) cte ON ((r.id OPERATOR(pg_catalog.=) cte.datid))) LIMIT 1 DEBUG: Creating router plan ?column? --------------------------------------------------------------------- @@ -1769,7 +1828,7 @@ SET citus.log_remote_commands TO on; -- single shard select queries are router plannable SELECT * FROM articles_range where author_id = 1; DEBUG: Creating router plan -NOTICE: issuing SELECT id, author_id, title, word_count FROM public.articles_range_840012 articles_range WHERE (author_id OPERATOR(pg_catalog.=) 1) +NOTICE: issuing SELECT id, author_id, title, word_count FROM multi_router_planner.articles_range_840012 articles_range WHERE (author_id OPERATOR(pg_catalog.=) 1) DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx id | author_id | title | word_count --------------------------------------------------------------------- @@ -1777,7 +1836,7 @@ DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx SELECT * FROM articles_range where author_id = 1 or author_id = 5; DEBUG: Creating router plan -NOTICE: issuing SELECT id, author_id, title, word_count FROM public.articles_range_840012 articles_range WHERE ((author_id OPERATOR(pg_catalog.=) 1) OR (author_id OPERATOR(pg_catalog.=) 5)) +NOTICE: issuing SELECT id, author_id, title, word_count FROM multi_router_planner.articles_range_840012 articles_range WHERE ((author_id OPERATOR(pg_catalog.=) 1) OR (author_id OPERATOR(pg_catalog.=) 5)) DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx id | author_id | title | word_count --------------------------------------------------------------------- @@ -1795,7 +1854,7 @@ NOTICE: executing the command locally: SELECT id, author_id, title, word_count SELECT * FROM articles_range ar join authors_range au on (ar.author_id = au.id) WHERE ar.author_id = 1; DEBUG: Creating router plan -NOTICE: issuing SELECT ar.id, ar.author_id, ar.title, ar.word_count, au.name, au.id FROM (public.articles_range_840012 ar JOIN public.authors_range_840008 au ON ((ar.author_id OPERATOR(pg_catalog.=) au.id))) WHERE (ar.author_id OPERATOR(pg_catalog.=) 1) +NOTICE: issuing SELECT ar.id, ar.author_id, ar.title, ar.word_count, au.name, au.id FROM (multi_router_planner.articles_range_840012 ar JOIN multi_router_planner.authors_range_840008 au ON ((ar.author_id OPERATOR(pg_catalog.=) au.id))) WHERE (ar.author_id OPERATOR(pg_catalog.=) 1) DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx id | author_id | title | word_count | name | id --------------------------------------------------------------------- @@ -1898,15 +1957,54 @@ DEBUG: Creating router plan -- join between hash and range partition tables are router plannable -- only if both tables pruned down to single shard and co-located on the same -- node. --- router plannable +SET citus.enable_non_colocated_router_query_pushdown TO ON; +-- router plannable when citus.enable_non_colocated_router_query_pushdown is on SELECT * FROM articles_hash ar join authors_range au on (ar.author_id = au.id) - WHERE ar.author_id = 2; + WHERE ar.author_id = 2 ORDER BY 1,2,3,4,5,6; DEBUG: Creating router plan DEBUG: query has a single distribution column value: 2 id | author_id | title | word_count | name | id --------------------------------------------------------------------- (0 rows) +SET citus.enable_non_colocated_router_query_pushdown TO OFF; +-- not router plannable otherwise +SELECT * FROM articles_hash ar join authors_range au on (ar.author_id = au.id) + WHERE ar.author_id = 2 ORDER BY 1,2,3,4,5,6; +DEBUG: router planner does not support queries that reference non-colocated distributed tables +DEBUG: join prunable for task partitionId 0 and 1 +DEBUG: join prunable for task partitionId 0 and 2 +DEBUG: join prunable for task partitionId 0 and 3 +DEBUG: join prunable for task partitionId 1 and 0 +DEBUG: join prunable for task partitionId 1 and 2 +DEBUG: join prunable for task partitionId 1 and 3 +DEBUG: join prunable for task partitionId 2 and 0 +DEBUG: join prunable for task partitionId 2 and 1 +DEBUG: join prunable for task partitionId 2 and 3 +DEBUG: join prunable for task partitionId 3 and 0 +DEBUG: join prunable for task partitionId 3 and 1 +DEBUG: join prunable for task partitionId 3 and 2 +DEBUG: pruning merge fetch taskId 1 +DETAIL: Creating dependency on merge taskId 2 +DEBUG: pruning merge fetch taskId 2 +DETAIL: Creating dependency on merge taskId 5 +DEBUG: pruning merge fetch taskId 4 +DETAIL: Creating dependency on merge taskId 4 +DEBUG: pruning merge fetch taskId 5 +DETAIL: Creating dependency on merge taskId 10 +DEBUG: pruning merge fetch taskId 7 +DETAIL: Creating dependency on merge taskId 6 +DEBUG: pruning merge fetch taskId 8 +DETAIL: Creating dependency on merge taskId 15 +DEBUG: pruning merge fetch taskId 10 +DETAIL: Creating dependency on merge taskId 8 +DEBUG: pruning merge fetch taskId 11 +DETAIL: Creating dependency on merge taskId 20 + id | author_id | title | word_count | name | id +--------------------------------------------------------------------- +(0 rows) + +RESET citus.enable_non_colocated_router_query_pushdown; -- not router plannable SELECT * FROM articles_hash ar join authors_range au on (ar.author_id = au.id) WHERE ar.author_id = 3; @@ -2433,12 +2531,15 @@ SELECT create_distributed_table('failure_test', 'a', 'hash'); SET citus.enable_ddl_propagation TO off; CREATE USER router_user; -GRANT INSERT ON ALL TABLES IN SCHEMA public TO router_user; +GRANT USAGE ON SCHEMA multi_router_planner TO router_user; +GRANT INSERT ON ALL TABLES IN SCHEMA multi_router_planner TO router_user; \c - - - :worker_1_port SET citus.enable_ddl_propagation TO off; CREATE USER router_user; -GRANT INSERT ON ALL TABLES IN SCHEMA public TO router_user; +GRANT USAGE ON SCHEMA multi_router_planner TO router_user; +GRANT INSERT ON ALL TABLES IN SCHEMA multi_router_planner TO router_user; \c - router_user - :master_port +SET search_path TO multi_router_planner; -- we will fail to connect to worker 2, since the user does not exist -- still, we never mark placements inactive. Instead, fail the transaction BEGIN; @@ -2452,7 +2553,7 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'failure_test'::regclass ) - ORDER BY placementid; + ORDER BY shardid, nodeport; shardid | shardstate | nodename | nodeport --------------------------------------------------------------------- 840017 | 1 | localhost | 57637 @@ -2471,18 +2572,55 @@ DROP USER router_user; \c - - - :master_port DROP OWNED BY router_user; DROP USER router_user; -DROP TABLE failure_test; -DROP FUNCTION author_articles_max_id(); -DROP FUNCTION author_articles_id_word_count(); -DROP MATERIALIZED VIEW mv_articles_hash_empty; -DROP MATERIALIZED VIEW mv_articles_hash_data; -DROP VIEW num_db; -DROP FUNCTION number1(); -DROP TABLE articles_hash; -DROP TABLE articles_single_shard_hash; -DROP TABLE authors_hash; -DROP TABLE authors_range; -DROP TABLE authors_reference; -DROP TABLE company_employees; -DROP TABLE articles_range; -DROP TABLE articles_append; +SET search_path TO multi_router_planner; +SET citus.next_shard_id TO 850000; +SET citus.shard_replication_factor TO 1; +CREATE TABLE single_shard_dist(a int, b int); +SELECT create_distributed_table('single_shard_dist', 'a', shard_count=>1); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SET citus.shard_replication_factor TO 2; +CREATE TABLE table_with_four_shards(a int, b int); +SELECT create_distributed_table('table_with_four_shards', 'a', shard_count=>4); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SET client_min_messages TO DEBUG2; +-- Make sure that router rejects planning this query because +-- the target shards are not placed on the same node when +-- citus.enable_non_colocated_router_query_pushdown is disabled. +-- Otherwise, it throws a somewhat meaningless error but we assume +-- that the user is aware of the setting. +SET citus.enable_non_colocated_router_query_pushdown TO ON; +WITH cte AS ( + DELETE FROM table_with_four_shards WHERE a = 1 RETURNING * +) +SELECT * FROM single_shard_dist WHERE b IN (SELECT b FROM cte); +DEBUG: Creating router plan +DEBUG: query has a single distribution column value: 1 +ERROR: relation "multi_router_planner.single_shard_dist_850000" does not exist +CONTEXT: while executing command on localhost:xxxxx +SET citus.enable_non_colocated_router_query_pushdown TO OFF; +WITH cte AS ( + DELETE FROM table_with_four_shards WHERE a = 1 RETURNING * +) +SELECT * FROM single_shard_dist WHERE b IN (SELECT b FROM cte); +DEBUG: router planner does not support queries that reference non-colocated distributed tables +DEBUG: generating subplan XXX_1 for CTE cte: DELETE FROM multi_router_planner.table_with_four_shards WHERE (a OPERATOR(pg_catalog.=) 1) RETURNING a, b +DEBUG: Distributed planning for a fast-path router query +DEBUG: Creating router plan +DEBUG: query has a single distribution column value: 1 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a, b FROM multi_router_planner.single_shard_dist WHERE (b OPERATOR(pg_catalog.=) ANY (SELECT cte.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte)) +DEBUG: Creating router plan + a | b +--------------------------------------------------------------------- +(0 rows) + +RESET citus.enable_non_colocated_router_query_pushdown; +SET client_min_messages TO WARNING; +DROP SCHEMA multi_router_planner CASCADE; diff --git a/src/test/regress/expected/multi_shard_update_delete.out b/src/test/regress/expected/multi_shard_update_delete.out index 016801d26..af8ddfb2d 100644 --- a/src/test/regress/expected/multi_shard_update_delete.out +++ b/src/test/regress/expected/multi_shard_update_delete.out @@ -674,7 +674,7 @@ UPDATE users_test_table SET value_2 = 5 FROM events_test_table WHERE users_test_table.user_id = events_test_table.user_id * random(); -ERROR: functions used in the WHERE clause of modification queries on distributed tables must not be VOLATILE +ERROR: functions used in the WHERE/ON/WHEN clause of modification queries on distributed tables must not be VOLATILE UPDATE users_test_table SET value_2 = 5 * random() FROM events_test_table diff --git a/src/test/regress/expected/multi_simple_queries.out b/src/test/regress/expected/multi_simple_queries.out index 6bd8bad4a..646c42599 100644 --- a/src/test/regress/expected/multi_simple_queries.out +++ b/src/test/regress/expected/multi_simple_queries.out @@ -7,6 +7,8 @@ SET citus.coordinator_aggregation_strategy TO 'disabled'; -- =================================================================== -- test end-to-end query functionality -- =================================================================== +CREATE SCHEMA simple_queries_test; +SET search_path TO simple_queries_test; CREATE TABLE articles ( id bigint NOT NULL, author_id bigint NOT NULL, @@ -382,7 +384,7 @@ SELECT author_id FROM articles 8 (3 rows) -SELECT o_orderstatus, count(*), avg(o_totalprice) FROM orders +SELECT o_orderstatus, count(*), avg(o_totalprice) FROM public.orders GROUP BY o_orderstatus HAVING count(*) > 1450 OR avg(o_totalprice) > 150000 ORDER BY o_orderstatus; @@ -392,7 +394,7 @@ SELECT o_orderstatus, count(*), avg(o_totalprice) FROM orders P | 75 | 164847.914533333333 (2 rows) -SELECT o_orderstatus, sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders +SELECT o_orderstatus, sum(l_linenumber), avg(l_linenumber) FROM public.lineitem, public.orders WHERE l_orderkey = o_orderkey AND l_orderkey > 9030 GROUP BY o_orderstatus HAVING sum(l_linenumber) > 1000 @@ -479,12 +481,13 @@ DEBUG: query has a single distribution column value: 10 10 | 6363 (3 rows) --- now show that JOINs with multiple tables are not router executable --- they are executed by real-time executor +-- Not router plannable when citus.enable_non_colocated_router_query_pushdown +-- is disabled. +SET citus.enable_non_colocated_router_query_pushdown TO ON; SELECT a.author_id as first_author, b.word_count as second_word_count FROM articles a, articles_single_shard b WHERE a.author_id = 10 and a.author_id = b.author_id - LIMIT 3; + ORDER BY 1,2 LIMIT 3; DEBUG: Creating router plan DEBUG: query has a single distribution column value: 10 first_author | second_word_count @@ -494,6 +497,88 @@ DEBUG: query has a single distribution column value: 10 10 | 19519 (3 rows) +SET citus.enable_non_colocated_router_query_pushdown TO OFF; +SELECT a.author_id as first_author, b.word_count as second_word_count + FROM articles a, articles_single_shard b + WHERE a.author_id = 10 and a.author_id = b.author_id + ORDER BY 1,2 LIMIT 3; +DEBUG: router planner does not support queries that reference non-colocated distributed tables +DEBUG: push down of limit count: 3 +DEBUG: join prunable for task partitionId 0 and 1 +DEBUG: join prunable for task partitionId 0 and 2 +DEBUG: join prunable for task partitionId 0 and 3 +DEBUG: join prunable for task partitionId 1 and 0 +DEBUG: join prunable for task partitionId 1 and 2 +DEBUG: join prunable for task partitionId 1 and 3 +DEBUG: join prunable for task partitionId 2 and 0 +DEBUG: join prunable for task partitionId 2 and 1 +DEBUG: join prunable for task partitionId 2 and 3 +DEBUG: join prunable for task partitionId 3 and 0 +DEBUG: join prunable for task partitionId 3 and 1 +DEBUG: join prunable for task partitionId 3 and 2 +DEBUG: pruning merge fetch taskId 1 +DETAIL: Creating dependency on merge taskId 2 +DEBUG: pruning merge fetch taskId 2 +DETAIL: Creating dependency on merge taskId 2 +DEBUG: pruning merge fetch taskId 4 +DETAIL: Creating dependency on merge taskId 4 +DEBUG: pruning merge fetch taskId 5 +DETAIL: Creating dependency on merge taskId 4 +DEBUG: pruning merge fetch taskId 7 +DETAIL: Creating dependency on merge taskId 6 +DEBUG: pruning merge fetch taskId 8 +DETAIL: Creating dependency on merge taskId 6 +DEBUG: pruning merge fetch taskId 10 +DETAIL: Creating dependency on merge taskId 8 +DEBUG: pruning merge fetch taskId 11 +DETAIL: Creating dependency on merge taskId 8 +ERROR: the query contains a join that requires repartitioning +HINT: Set citus.enable_repartition_joins to on to enable repartitioning +-- but they can be executed via repartition join planner +SET citus.enable_repartition_joins TO ON; +SELECT a.author_id as first_author, b.word_count as second_word_count + FROM articles a, articles_single_shard b + WHERE a.author_id = 10 and a.author_id = b.author_id + ORDER BY 1,2 LIMIT 3; +DEBUG: router planner does not support queries that reference non-colocated distributed tables +DEBUG: push down of limit count: 3 +DEBUG: join prunable for task partitionId 0 and 1 +DEBUG: join prunable for task partitionId 0 and 2 +DEBUG: join prunable for task partitionId 0 and 3 +DEBUG: join prunable for task partitionId 1 and 0 +DEBUG: join prunable for task partitionId 1 and 2 +DEBUG: join prunable for task partitionId 1 and 3 +DEBUG: join prunable for task partitionId 2 and 0 +DEBUG: join prunable for task partitionId 2 and 1 +DEBUG: join prunable for task partitionId 2 and 3 +DEBUG: join prunable for task partitionId 3 and 0 +DEBUG: join prunable for task partitionId 3 and 1 +DEBUG: join prunable for task partitionId 3 and 2 +DEBUG: pruning merge fetch taskId 1 +DETAIL: Creating dependency on merge taskId 2 +DEBUG: pruning merge fetch taskId 2 +DETAIL: Creating dependency on merge taskId 2 +DEBUG: pruning merge fetch taskId 4 +DETAIL: Creating dependency on merge taskId 4 +DEBUG: pruning merge fetch taskId 5 +DETAIL: Creating dependency on merge taskId 4 +DEBUG: pruning merge fetch taskId 7 +DETAIL: Creating dependency on merge taskId 6 +DEBUG: pruning merge fetch taskId 8 +DETAIL: Creating dependency on merge taskId 6 +DEBUG: pruning merge fetch taskId 10 +DETAIL: Creating dependency on merge taskId 8 +DEBUG: pruning merge fetch taskId 11 +DETAIL: Creating dependency on merge taskId 8 + first_author | second_word_count +--------------------------------------------------------------------- + 10 | 19519 + 10 | 19519 + 10 | 19519 +(3 rows) + +RESET citus.enable_repartition_joins; +RESET citus.enable_non_colocated_router_query_pushdown; -- do not create the master query for LIMIT on a single shard SELECT SELECT * FROM articles @@ -541,7 +626,7 @@ DEBUG: query has a single distribution column value: 2 -- error out on unsupported aggregate SET client_min_messages to 'NOTICE'; -CREATE AGGREGATE public.invalid(int) ( +CREATE AGGREGATE invalid(int) ( sfunc = int4pl, stype = int ); @@ -812,10 +897,11 @@ SELECT * FROM (SELECT nextval('query_seq') FROM articles LIMIT 3) vals; (3 rows) -- but not elsewhere -SELECT sum(nextval('query_seq')) FROM articles; -ERROR: relation "public.query_seq" does not exist +SELECT sum(nextval('simple_queries_test.query_seq')) FROM articles; +ERROR: relation "simple_queries_test.query_seq" does not exist CONTEXT: while executing command on localhost:xxxxx -SELECT n FROM (SELECT nextval('query_seq') n, random() FROM articles) vals; -ERROR: relation "public.query_seq" does not exist +SELECT n FROM (SELECT nextval('simple_queries_test.query_seq') n, random() FROM articles) vals; +ERROR: relation "simple_queries_test.query_seq" does not exist CONTEXT: while executing command on localhost:xxxxx -DROP SEQUENCE query_seq; +SET client_min_messages TO WARNING; +DROP SCHEMA simple_queries_test CASCADE; diff --git a/src/test/regress/expected/multi_upsert.out b/src/test/regress/expected/multi_upsert.out index 08308aba0..e41b2a3d5 100644 --- a/src/test/regress/expected/multi_upsert.out +++ b/src/test/regress/expected/multi_upsert.out @@ -1,5 +1,7 @@ -- this test file aims to test UPSERT feature on Citus SET citus.next_shard_id TO 980000; +CREATE SCHEMA upsert_test; +SET search_path TO upsert_test; CREATE TABLE upsert_test ( part_key int UNIQUE, @@ -244,3 +246,5 @@ ERROR: functions used in the WHERE clause of the ON CONFLICT clause of INSERTs INSERT INTO upsert_test (part_key, other_col) VALUES (1, 1) ON CONFLICT (part_key) DO UPDATE SET part_key = 15; ERROR: modifying the partition value of rows is not allowed +SET client_min_messages TO WARNING; +DROP SCHEMA upsert_test CASCADE; diff --git a/src/test/regress/expected/pg15.out b/src/test/regress/expected/pg15.out index 7a41b25ec..7fc102dbb 100644 --- a/src/test/regress/expected/pg15.out +++ b/src/test/regress/expected/pg15.out @@ -315,7 +315,7 @@ SELECT create_reference_table('tbl2'); MERGE INTO tbl1 USING tbl2 ON (true) WHEN MATCHED THEN DELETE; -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported on reference tables yet -- now, both are reference, still not supported SELECT create_reference_table('tbl1'); create_reference_table @@ -325,7 +325,7 @@ SELECT create_reference_table('tbl1'); MERGE INTO tbl1 USING tbl2 ON (true) WHEN MATCHED THEN DELETE; -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is not supported on reference tables yet -- now, both distributed, not works SELECT undistribute_table('tbl1'); NOTICE: creating a new table for pg15.tbl1 @@ -419,29 +419,36 @@ SELECT create_distributed_table('tbl2', 'x'); MERGE INTO tbl1 USING tbl2 ON (true) WHEN MATCHED THEN DELETE; -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns -- also, not inside subqueries & ctes WITH targq AS ( SELECT * FROM tbl2 ) MERGE INTO tbl1 USING targq ON (true) WHEN MATCHED THEN DELETE; -ERROR: MERGE command is not supported on distributed/reference tables yet --- crashes on beta3, fixed on 15 stable ---WITH foo AS ( --- MERGE INTO tbl1 USING tbl2 ON (true) --- WHEN MATCHED THEN DELETE ---) SELECT * FROM foo; ---COPY ( --- MERGE INTO tbl1 USING tbl2 ON (true) --- WHEN MATCHED THEN DELETE ---) TO stdout; +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns +WITH foo AS ( + MERGE INTO tbl1 USING tbl2 ON (true) + WHEN MATCHED THEN DELETE +) SELECT * FROM foo; +ERROR: MERGE not supported in WITH query +COPY ( + MERGE INTO tbl1 USING tbl2 ON (true) + WHEN MATCHED THEN DELETE +) TO stdout; +ERROR: MERGE not supported in COPY +MERGE INTO tbl1 t +USING tbl2 +ON (true) +WHEN MATCHED THEN + DO NOTHING; +ERROR: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns MERGE INTO tbl1 t USING tbl2 ON (true) WHEN MATCHED THEN UPDATE SET x = (SELECT count(*) FROM tbl2); -ERROR: MERGE command is not supported on distributed/reference tables yet +ERROR: updating the distribution column is not allowed in MERGE actions -- test numeric types with negative scale CREATE TABLE numeric_negative_scale(numeric_column numeric(3,-1), orig_value int); INSERT into numeric_negative_scale SELECT x,x FROM generate_series(111, 115) x; diff --git a/src/test/regress/expected/pgmerge.out b/src/test/regress/expected/pgmerge.out index b90760691..6bdb7f771 100644 --- a/src/test/regress/expected/pgmerge.out +++ b/src/test/regress/expected/pgmerge.out @@ -910,7 +910,15 @@ MERGE INTO wq_target t USING wq_source s ON t.tid = s.sid WHEN MATCHED AND (merge_when_and_write()) THEN UPDATE SET balance = t.balance + s.balance; -ERROR: functions used in UPDATE queries on distributed tables must not be VOLATILE +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables +ROLLBACK; +-- Test preventing ON condition from writing to the database +BEGIN; +MERGE INTO wq_target t +USING wq_source s ON t.tid = s.sid AND (merge_when_and_write()) +WHEN MATCHED THEN + UPDATE SET balance = t.balance + s.balance; +ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables ROLLBACK; drop function merge_when_and_write(); DROP TABLE wq_target, wq_source; @@ -1885,13 +1893,16 @@ INSERT INTO pa_target SELECT '2017-02-28', id, id * 100, 'initial' FROM generate SET client_min_messages TO DEBUG1; BEGIN; MERGE INTO pa_target t - USING (SELECT '2017-01-15' AS slogts, * FROM pa_source WHERE sid < 10) s + USING (SELECT * FROM pa_source WHERE sid < 10) s + --USING (SELECT '2017-01-15' AS slogts, * FROM pa_source WHERE sid < 10) s ON t.tid = s.sid WHEN MATCHED THEN UPDATE SET balance = balance + delta, val = val || ' updated by merge' WHEN NOT MATCHED THEN - INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge'); -DEBUG: + INSERT VALUES ('2017-01-15', sid, delta, 'inserted by merge'); +DEBUG: Creating MERGE router plan +DEBUG: + --INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge'); SELECT * FROM pa_target ORDER BY tid; logts | tid | balance | val --------------------------------------------------------------------- @@ -2083,7 +2094,8 @@ WHEN MATCHED THEN UPDATE WHEN NOT MATCHED THEN INSERT (city_id, logdate, peaktemp, unitsales) VALUES (city_id, logdate, peaktemp, unitsales); -DEBUG: +DEBUG: Creating MERGE router plan +DEBUG: RESET client_min_messages; SELECT tableoid::regclass, * FROM measurement ORDER BY city_id, logdate; tableoid | city_id | logdate | peaktemp | unitsales diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out new file mode 100644 index 000000000..a267cbe71 --- /dev/null +++ b/src/test/regress/expected/publication.out @@ -0,0 +1,379 @@ +CREATE SCHEMA publication; +CREATE SCHEMA "publication-1"; +SET search_path TO publication; +SET citus.shard_replication_factor TO 1; +-- for citus_add_local_table_to_metadata / create_distributed_table_concurrently +SELECT citus_set_coordinator_host('localhost', :master_port); + citus_set_coordinator_host +--------------------------------------------------------------------- + +(1 row) + +CREATE OR REPLACE FUNCTION activate_node_snapshot() + RETURNS text[] + LANGUAGE C STRICT + AS 'citus'; +COMMENT ON FUNCTION activate_node_snapshot() + IS 'commands to activate node snapshot'; +\c - - - :worker_1_port +SET citus.enable_ddl_propagation TO off; +CREATE OR REPLACE FUNCTION activate_node_snapshot() + RETURNS text[] + LANGUAGE C STRICT + AS 'citus'; +COMMENT ON FUNCTION activate_node_snapshot() + IS 'commands to activate node snapshot'; +\c - - - :worker_2_port +SET citus.enable_ddl_propagation TO off; +CREATE OR REPLACE FUNCTION activate_node_snapshot() + RETURNS text[] + LANGUAGE C STRICT + AS 'citus'; +COMMENT ON FUNCTION activate_node_snapshot() + IS 'commands to activate node snapshot'; +-- create some publications with conflicting names on worker node +-- publication will be different from coordinator +CREATE PUBLICATION "pub-all"; +-- publication will be same as coordinator +CREATE PUBLICATION "pub-all-insertupdateonly" FOR ALL TABLES WITH (publish = 'insert, update');; +\c - - - :master_port +SET search_path TO publication; +SET citus.shard_replication_factor TO 1; +-- do not create publications on worker 2 initially +SELECT citus_remove_node('localhost', :worker_2_port); + citus_remove_node +--------------------------------------------------------------------- + +(1 row) + +-- create a non-distributed publication +SET citus.enable_ddl_propagation TO off; +CREATE PUBLICATION pubnotdistributed WITH (publish = 'delete'); +RESET citus.enable_ddl_propagation; +ALTER PUBLICATION pubnotdistributed SET (publish = 'truncate'); +-- create regular, distributed publications +CREATE PUBLICATION pubempty; +CREATE PUBLICATION pubinsertonly WITH (publish = 'insert'); +CREATE PUBLICATION "pub-all" FOR ALL TABLES; +CREATE PUBLICATION "pub-all-insertupdateonly" FOR ALL TABLES WITH (publish = 'insert, update'); +-- add worker 2 with publications +SELECT 1 FROM citus_add_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +-- Check publications on all the nodes, if we see the same publication name twice then its definition differs +-- Note that publications are special in the sense that the coordinator object might differ from +-- worker objects due to the presence of regular tables. +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' ORDER BY 1) s$$) + ORDER BY c) s; + c +--------------------------------------------------------------------- + SELECT worker_create_or_replace_object('CREATE PUBLICATION "pub-all" FOR ALL TABLES WITH (publish_via_partition_root = ''false'', publish = ''insert, update, delete, truncate'')'); + SELECT worker_create_or_replace_object('CREATE PUBLICATION "pub-all-insertupdateonly" FOR ALL TABLES WITH (publish_via_partition_root = ''false'', publish = ''insert, update'')'); + SELECT worker_create_or_replace_object('CREATE PUBLICATION pubempty WITH (publish_via_partition_root = ''false'', publish = ''insert, update, delete, truncate'')'); + SELECT worker_create_or_replace_object('CREATE PUBLICATION pubinsertonly WITH (publish_via_partition_root = ''false'', publish = ''insert'')'); +(4 rows) + +CREATE TABLE test (x int primary key, y int, "column-1" int, doc xml); +CREATE TABLE "test-pubs" (x int primary key, y int, "column-1" int); +CREATE TABLE "publication-1"."test-pubs" (x int primary key, y int, "column-1" int); +-- various operations on a publication with only local tables +CREATE PUBLICATION pubtables_orig FOR TABLE test, "test-pubs", "publication-1"."test-pubs" WITH (publish = 'insert, truncate'); +ALTER PUBLICATION pubtables_orig DROP TABLE test; +ALTER PUBLICATION pubtables_orig ADD TABLE test; +-- publication will be empty on worker nodes, since all tables are local +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubtables%' ORDER BY 1) s$$) + ORDER BY c) s; + c +--------------------------------------------------------------------- + SELECT worker_create_or_replace_object('CREATE PUBLICATION pubtables_orig WITH (publish_via_partition_root = ''false'', publish = ''insert, truncate'')'); +(1 row) + +-- distribute a table, creating a mixed publication +SELECT create_distributed_table('test','x', colocate_with := 'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- some generic operations +ALTER PUBLICATION pubtables_orig RENAME TO pubtables; +ALTER PUBLICATION pubtables SET (publish = 'insert, update, delete'); +ALTER PUBLICATION pubtables OWNER TO postgres; +ALTER PUBLICATION pubtables SET (publish = 'inert, update, delete'); +ERROR: unrecognized value for publication option "publish": "inert" +ALTER PUBLICATION pubtables ADD TABLE notexist; +ERROR: relation "notexist" does not exist +-- operations with a distributed table +ALTER PUBLICATION pubtables DROP TABLE test; +ALTER PUBLICATION pubtables ADD TABLE test; +ALTER PUBLICATION pubtables SET TABLE test, "test-pubs", "publication-1"."test-pubs"; +-- operations with a local table in a mixed publication +ALTER PUBLICATION pubtables DROP TABLE "test-pubs"; +ALTER PUBLICATION pubtables ADD TABLE "test-pubs"; +SELECT create_distributed_table('"test-pubs"', 'x'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- test and test-pubs will show up in worker nodes +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubtables%' ORDER BY 1) s$$) + ORDER BY c) s; + c +--------------------------------------------------------------------- + SELECT worker_create_or_replace_object('CREATE PUBLICATION pubtables FOR TABLE publication.test, TABLE publication."test-pubs" WITH (publish_via_partition_root = ''false'', publish = ''insert, update, delete'')'); +(1 row) + +-- operations with a strangely named distributed table in a mixed publication +ALTER PUBLICATION pubtables DROP TABLE "test-pubs"; +ALTER PUBLICATION pubtables ADD TABLE "test-pubs"; +-- create a publication with distributed and local tables +DROP PUBLICATION pubtables; +CREATE PUBLICATION pubtables FOR TABLE test, "test-pubs", "publication-1"."test-pubs"; +-- change distributed tables +SELECT alter_distributed_table('test', shard_count := 5, cascade_to_colocated := true); +NOTICE: creating a new table for publication.test +NOTICE: moving the data of publication.test +NOTICE: dropping the old publication.test +NOTICE: renaming the new table to publication.test +NOTICE: creating a new table for publication."test-pubs" +NOTICE: moving the data of publication."test-pubs" +NOTICE: dropping the old publication."test-pubs" +NOTICE: renaming the new table to publication."test-pubs" + alter_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT undistribute_table('test'); +NOTICE: creating a new table for publication.test +NOTICE: moving the data of publication.test +NOTICE: dropping the old publication.test +NOTICE: renaming the new table to publication.test + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_add_local_table_to_metadata('test'); + citus_add_local_table_to_metadata +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table_concurrently('test', 'x'); + create_distributed_table_concurrently +--------------------------------------------------------------------- + +(1 row) + +SELECT undistribute_table('"test-pubs"'); +NOTICE: creating a new table for publication."test-pubs" +NOTICE: moving the data of publication."test-pubs" +NOTICE: dropping the old publication."test-pubs" +NOTICE: renaming the new table to publication."test-pubs" + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('"test-pubs"'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +-- publications are unchanged despite various tranformations +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubtables%' ORDER BY 1) s$$) + ORDER BY c) s; + c +--------------------------------------------------------------------- + SELECT worker_create_or_replace_object('CREATE PUBLICATION pubtables FOR TABLE publication.test, TABLE publication."test-pubs" WITH (publish_via_partition_root = ''false'', publish = ''insert, update, delete, truncate'')'); +(1 row) + +-- partitioned table +CREATE TABLE testpub_partitioned (a int, b text, c text) PARTITION BY RANGE (a); +CREATE TABLE testpub_partitioned_0 PARTITION OF testpub_partitioned FOR VALUES FROM (1) TO (10); +ALTER TABLE testpub_partitioned_0 ADD PRIMARY KEY (a); +ALTER TABLE testpub_partitioned_0 REPLICA IDENTITY USING INDEX testpub_partitioned_0_pkey; +CREATE TABLE testpub_partitioned_1 PARTITION OF testpub_partitioned FOR VALUES FROM (11) TO (20); +ALTER TABLE testpub_partitioned_1 ADD PRIMARY KEY (a); +ALTER TABLE testpub_partitioned_1 REPLICA IDENTITY USING INDEX testpub_partitioned_1_pkey; +CREATE PUBLICATION pubpartitioned FOR TABLE testpub_partitioned WITH (publish_via_partition_root = 'true'); +SELECT create_distributed_table('testpub_partitioned', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubpartitioned%' ORDER BY 1) s$$) + ORDER BY c) s; + c +--------------------------------------------------------------------- + SELECT worker_create_or_replace_object('CREATE PUBLICATION pubpartitioned FOR TABLE publication.testpub_partitioned WITH (publish_via_partition_root = ''true'', publish = ''insert, update, delete, truncate'')'); +(1 row) + +DROP PUBLICATION pubpartitioned; +CREATE PUBLICATION pubpartitioned FOR TABLE testpub_partitioned WITH (publish_via_partition_root = 'true'); +-- add a partition +ALTER PUBLICATION pubpartitioned ADD TABLE testpub_partitioned_1; +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLIATION%' AND c LIKE '%pubpartitioned%' ORDER BY 1) s$$) + ORDER BY c) s; +ERROR: malformed array literal: "" +DETAIL: Array value must start with "{" or dimension information. +-- make sure we can sync all the publication metadata +SELECT start_metadata_sync_to_all_nodes(); + start_metadata_sync_to_all_nodes +--------------------------------------------------------------------- + t +(1 row) + +DROP PUBLICATION pubempty; +DROP PUBLICATION pubtables; +DROP PUBLICATION pubinsertonly; +DROP PUBLICATION "pub-all-insertupdateonly"; +DROP PUBLICATION "pub-all"; +DROP PUBLICATION pubpartitioned; +DROP PUBLICATION pubnotdistributed; +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +SET client_min_messages TO ERROR; +DROP SCHEMA publication CASCADE; +DROP SCHEMA "publication-1" CASCADE; +SELECT citus_remove_node('localhost', :master_port); +\q +\endif +-- recreate a mixed publication +CREATE PUBLICATION pubtables FOR TABLE test, "publication-1"."test-pubs"; +-- operations on an existing distributed table +ALTER PUBLICATION pubtables DROP TABLE test; +ALTER PUBLICATION pubtables ADD TABLE test (y); +ALTER PUBLICATION pubtables SET TABLE test WHERE (doc IS DOCUMENT); +ALTER PUBLICATION pubtables SET TABLE test WHERE (xmlexists('//foo[text() = ''bar'']' PASSING BY VALUE doc)); +ALTER PUBLICATION pubtables SET TABLE test WHERE (CASE x WHEN 5 THEN true ELSE false END); +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubtables%' ORDER BY 1) s$$) + ORDER BY c) s; + c +--------------------------------------------------------------------- + SELECT worker_create_or_replace_object('CREATE PUBLICATION pubtables FOR TABLE publication.test WHERE (CASE test.x WHEN 5 THEN true ELSE false END) WITH (publish_via_partition_root = ''false'', publish = ''insert, update, delete, truncate'')'); +(1 row) + +ALTER PUBLICATION pubtables SET TABLE test ("column-1", x) WHERE (x > "column-1"), "publication-1"."test-pubs"; +-- operations on a local table +ALTER PUBLICATION pubtables DROP TABLE "publication-1"."test-pubs"; +ALTER PUBLICATION pubtables ADD TABLE "publication-1"."test-pubs" (y); +-- mixed operations +ALTER PUBLICATION pubtables SET TABLE test, TABLES IN SCHEMA "publication-1", TABLES IN SCHEMA current_schema; +ALTER PUBLICATION pubtables SET TABLE "publication-1"."test-pubs", test ("column-1", x) WHERE (x > "column-1"); +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubtables%' ORDER BY 1) s$$) + ORDER BY c) s; + c +--------------------------------------------------------------------- + SELECT worker_create_or_replace_object('CREATE PUBLICATION pubtables FOR TABLE publication.test (x, "column-1") WHERE ((test.x > test."column-1")) WITH (publish_via_partition_root = ''false'', publish = ''insert, update, delete, truncate'')'); +(1 row) + +-- publication with schemas +CREATE PUBLICATION "pub-mix" FOR TABLE test, TABLES IN SCHEMA current_schema, TABLE "publication-1"."test-pubs", TABLES IN SCHEMA "publication-1"; +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pub-mix%' ORDER BY 1) s$$) + ORDER BY c) s; + c +--------------------------------------------------------------------- + SELECT worker_create_or_replace_object('CREATE PUBLICATION "pub-mix" FOR TABLES IN SCHEMA publication, TABLES IN SCHEMA "publication-1", TABLE publication.test WITH (publish_via_partition_root = ''false'', publish = ''insert, update, delete, truncate'')'); +(1 row) + +-- publication on a partitioned table +CREATE PUBLICATION pubpartitioned FOR TABLE testpub_partitioned (a, b) WITH (publish_via_partition_root = 'true'); +ALTER PUBLICATION pubpartitioned SET (publish_via_partition_root = 1); +SELECT alter_distributed_table('testpub_partitioned', shard_count := 6, cascade_to_colocated := true); +NOTICE: converting the partitions of publication.testpub_partitioned +NOTICE: creating a new table for publication.testpub_partitioned_0 +NOTICE: moving the data of publication.testpub_partitioned_0 +NOTICE: dropping the old publication.testpub_partitioned_0 +NOTICE: renaming the new table to publication.testpub_partitioned_0 +NOTICE: creating a new table for publication.testpub_partitioned_1 +NOTICE: moving the data of publication.testpub_partitioned_1 +NOTICE: dropping the old publication.testpub_partitioned_1 +NOTICE: renaming the new table to publication.testpub_partitioned_1 +NOTICE: creating a new table for publication.testpub_partitioned +NOTICE: dropping the old publication.testpub_partitioned +NOTICE: renaming the new table to publication.testpub_partitioned +NOTICE: creating a new table for publication.test +NOTICE: moving the data of publication.test +NOTICE: dropping the old publication.test +NOTICE: renaming the new table to publication.test + alter_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubpartitioned%' ORDER BY 1) s$$) + ORDER BY c) s; + c +--------------------------------------------------------------------- + SELECT worker_create_or_replace_object('CREATE PUBLICATION pubpartitioned FOR TABLE publication.testpub_partitioned (a, b) WITH (publish_via_partition_root = ''true'', publish = ''insert, update, delete, truncate'')'); +(1 row) + +-- make sure we propagate schema dependencies +SET citus.create_object_propagation TO 'deferred'; +BEGIN; +CREATE SCHEMA deptest; +END; +CREATE PUBLICATION pubdep FOR TABLES IN SCHEMA deptest; +RESET citus.create_object_propagation; +DROP SCHEMA deptest CASCADE; +-- make sure we can sync all the publication metadata +SELECT start_metadata_sync_to_all_nodes(); + start_metadata_sync_to_all_nodes +--------------------------------------------------------------------- + t +(1 row) + +DROP PUBLICATION pubdep; +DROP PUBLICATION "pub-mix"; +DROP PUBLICATION pubtables; +DROP PUBLICATION pubpartitioned; +SET client_min_messages TO ERROR; +DROP SCHEMA publication CASCADE; +DROP SCHEMA "publication-1" CASCADE; +SELECT citus_remove_node('localhost', :master_port); + citus_remove_node +--------------------------------------------------------------------- + +(1 row) + diff --git a/src/test/regress/expected/publication_0.out b/src/test/regress/expected/publication_0.out new file mode 100644 index 000000000..617950a76 --- /dev/null +++ b/src/test/regress/expected/publication_0.out @@ -0,0 +1,273 @@ +CREATE SCHEMA publication; +CREATE SCHEMA "publication-1"; +SET search_path TO publication; +SET citus.shard_replication_factor TO 1; +-- for citus_add_local_table_to_metadata / create_distributed_table_concurrently +SELECT citus_set_coordinator_host('localhost', :master_port); + citus_set_coordinator_host +--------------------------------------------------------------------- + +(1 row) + +CREATE OR REPLACE FUNCTION activate_node_snapshot() + RETURNS text[] + LANGUAGE C STRICT + AS 'citus'; +COMMENT ON FUNCTION activate_node_snapshot() + IS 'commands to activate node snapshot'; +\c - - - :worker_1_port +SET citus.enable_ddl_propagation TO off; +CREATE OR REPLACE FUNCTION activate_node_snapshot() + RETURNS text[] + LANGUAGE C STRICT + AS 'citus'; +COMMENT ON FUNCTION activate_node_snapshot() + IS 'commands to activate node snapshot'; +\c - - - :worker_2_port +SET citus.enable_ddl_propagation TO off; +CREATE OR REPLACE FUNCTION activate_node_snapshot() + RETURNS text[] + LANGUAGE C STRICT + AS 'citus'; +COMMENT ON FUNCTION activate_node_snapshot() + IS 'commands to activate node snapshot'; +-- create some publications with conflicting names on worker node +-- publication will be different from coordinator +CREATE PUBLICATION "pub-all"; +-- publication will be same as coordinator +CREATE PUBLICATION "pub-all-insertupdateonly" FOR ALL TABLES WITH (publish = 'insert, update');; +\c - - - :master_port +SET search_path TO publication; +SET citus.shard_replication_factor TO 1; +-- do not create publications on worker 2 initially +SELECT citus_remove_node('localhost', :worker_2_port); + citus_remove_node +--------------------------------------------------------------------- + +(1 row) + +-- create a non-distributed publication +SET citus.enable_ddl_propagation TO off; +CREATE PUBLICATION pubnotdistributed WITH (publish = 'delete'); +RESET citus.enable_ddl_propagation; +ALTER PUBLICATION pubnotdistributed SET (publish = 'truncate'); +-- create regular, distributed publications +CREATE PUBLICATION pubempty; +CREATE PUBLICATION pubinsertonly WITH (publish = 'insert'); +CREATE PUBLICATION "pub-all" FOR ALL TABLES; +CREATE PUBLICATION "pub-all-insertupdateonly" FOR ALL TABLES WITH (publish = 'insert, update'); +-- add worker 2 with publications +SELECT 1 FROM citus_add_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +-- Check publications on all the nodes, if we see the same publication name twice then its definition differs +-- Note that publications are special in the sense that the coordinator object might differ from +-- worker objects due to the presence of regular tables. +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' ORDER BY 1) s$$) + ORDER BY c) s; + c +--------------------------------------------------------------------- + SELECT worker_create_or_replace_object('CREATE PUBLICATION "pub-all" FOR ALL TABLES WITH (publish_via_partition_root = ''false'', publish = ''insert, update, delete, truncate'')'); + SELECT worker_create_or_replace_object('CREATE PUBLICATION "pub-all-insertupdateonly" FOR ALL TABLES WITH (publish_via_partition_root = ''false'', publish = ''insert, update'')'); + SELECT worker_create_or_replace_object('CREATE PUBLICATION pubempty WITH (publish_via_partition_root = ''false'', publish = ''insert, update, delete, truncate'')'); + SELECT worker_create_or_replace_object('CREATE PUBLICATION pubinsertonly WITH (publish_via_partition_root = ''false'', publish = ''insert'')'); +(4 rows) + +CREATE TABLE test (x int primary key, y int, "column-1" int, doc xml); +CREATE TABLE "test-pubs" (x int primary key, y int, "column-1" int); +CREATE TABLE "publication-1"."test-pubs" (x int primary key, y int, "column-1" int); +-- various operations on a publication with only local tables +CREATE PUBLICATION pubtables_orig FOR TABLE test, "test-pubs", "publication-1"."test-pubs" WITH (publish = 'insert, truncate'); +ALTER PUBLICATION pubtables_orig DROP TABLE test; +ALTER PUBLICATION pubtables_orig ADD TABLE test; +-- publication will be empty on worker nodes, since all tables are local +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubtables%' ORDER BY 1) s$$) + ORDER BY c) s; + c +--------------------------------------------------------------------- + SELECT worker_create_or_replace_object('CREATE PUBLICATION pubtables_orig WITH (publish_via_partition_root = ''false'', publish = ''insert, truncate'')'); +(1 row) + +-- distribute a table, creating a mixed publication +SELECT create_distributed_table('test','x', colocate_with := 'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- some generic operations +ALTER PUBLICATION pubtables_orig RENAME TO pubtables; +ALTER PUBLICATION pubtables SET (publish = 'insert, update, delete'); +ALTER PUBLICATION pubtables OWNER TO postgres; +ALTER PUBLICATION pubtables SET (publish = 'inert, update, delete'); +ERROR: unrecognized "publish" value: "inert" +ALTER PUBLICATION pubtables ADD TABLE notexist; +ERROR: relation "notexist" does not exist +-- operations with a distributed table +ALTER PUBLICATION pubtables DROP TABLE test; +ALTER PUBLICATION pubtables ADD TABLE test; +ALTER PUBLICATION pubtables SET TABLE test, "test-pubs", "publication-1"."test-pubs"; +-- operations with a local table in a mixed publication +ALTER PUBLICATION pubtables DROP TABLE "test-pubs"; +ALTER PUBLICATION pubtables ADD TABLE "test-pubs"; +SELECT create_distributed_table('"test-pubs"', 'x'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- test and test-pubs will show up in worker nodes +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubtables%' ORDER BY 1) s$$) + ORDER BY c) s; + c +--------------------------------------------------------------------- + SELECT worker_create_or_replace_object('CREATE PUBLICATION pubtables FOR TABLE publication.test, publication."test-pubs" WITH (publish_via_partition_root = ''false'', publish = ''insert, update, delete'')'); +(1 row) + +-- operations with a strangely named distributed table in a mixed publication +ALTER PUBLICATION pubtables DROP TABLE "test-pubs"; +ALTER PUBLICATION pubtables ADD TABLE "test-pubs"; +-- create a publication with distributed and local tables +DROP PUBLICATION pubtables; +CREATE PUBLICATION pubtables FOR TABLE test, "test-pubs", "publication-1"."test-pubs"; +-- change distributed tables +SELECT alter_distributed_table('test', shard_count := 5, cascade_to_colocated := true); +NOTICE: creating a new table for publication.test +NOTICE: moving the data of publication.test +NOTICE: dropping the old publication.test +NOTICE: renaming the new table to publication.test +NOTICE: creating a new table for publication."test-pubs" +NOTICE: moving the data of publication."test-pubs" +NOTICE: dropping the old publication."test-pubs" +NOTICE: renaming the new table to publication."test-pubs" + alter_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT undistribute_table('test'); +NOTICE: creating a new table for publication.test +NOTICE: moving the data of publication.test +NOTICE: dropping the old publication.test +NOTICE: renaming the new table to publication.test + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_add_local_table_to_metadata('test'); + citus_add_local_table_to_metadata +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table_concurrently('test', 'x'); + create_distributed_table_concurrently +--------------------------------------------------------------------- + +(1 row) + +SELECT undistribute_table('"test-pubs"'); +NOTICE: creating a new table for publication."test-pubs" +NOTICE: moving the data of publication."test-pubs" +NOTICE: dropping the old publication."test-pubs" +NOTICE: renaming the new table to publication."test-pubs" + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('"test-pubs"'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +-- publications are unchanged despite various tranformations +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubtables%' ORDER BY 1) s$$) + ORDER BY c) s; + c +--------------------------------------------------------------------- + SELECT worker_create_or_replace_object('CREATE PUBLICATION pubtables FOR TABLE publication.test, publication."test-pubs" WITH (publish_via_partition_root = ''false'', publish = ''insert, update, delete, truncate'')'); +(1 row) + +-- partitioned table +CREATE TABLE testpub_partitioned (a int, b text, c text) PARTITION BY RANGE (a); +CREATE TABLE testpub_partitioned_0 PARTITION OF testpub_partitioned FOR VALUES FROM (1) TO (10); +ALTER TABLE testpub_partitioned_0 ADD PRIMARY KEY (a); +ALTER TABLE testpub_partitioned_0 REPLICA IDENTITY USING INDEX testpub_partitioned_0_pkey; +CREATE TABLE testpub_partitioned_1 PARTITION OF testpub_partitioned FOR VALUES FROM (11) TO (20); +ALTER TABLE testpub_partitioned_1 ADD PRIMARY KEY (a); +ALTER TABLE testpub_partitioned_1 REPLICA IDENTITY USING INDEX testpub_partitioned_1_pkey; +CREATE PUBLICATION pubpartitioned FOR TABLE testpub_partitioned WITH (publish_via_partition_root = 'true'); +SELECT create_distributed_table('testpub_partitioned', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubpartitioned%' ORDER BY 1) s$$) + ORDER BY c) s; + c +--------------------------------------------------------------------- + SELECT worker_create_or_replace_object('CREATE PUBLICATION pubpartitioned FOR TABLE publication.testpub_partitioned WITH (publish_via_partition_root = ''true'', publish = ''insert, update, delete, truncate'')'); +(1 row) + +DROP PUBLICATION pubpartitioned; +CREATE PUBLICATION pubpartitioned FOR TABLE testpub_partitioned WITH (publish_via_partition_root = 'true'); +-- add a partition +ALTER PUBLICATION pubpartitioned ADD TABLE testpub_partitioned_1; +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLIATION%' AND c LIKE '%pubpartitioned%' ORDER BY 1) s$$) + ORDER BY c) s; +ERROR: malformed array literal: "" +DETAIL: Array value must start with "{" or dimension information. +-- make sure we can sync all the publication metadata +SELECT start_metadata_sync_to_all_nodes(); + start_metadata_sync_to_all_nodes +--------------------------------------------------------------------- + t +(1 row) + +DROP PUBLICATION pubempty; +DROP PUBLICATION pubtables; +DROP PUBLICATION pubinsertonly; +DROP PUBLICATION "pub-all-insertupdateonly"; +DROP PUBLICATION "pub-all"; +DROP PUBLICATION pubpartitioned; +DROP PUBLICATION pubnotdistributed; +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +SET client_min_messages TO ERROR; +DROP SCHEMA publication CASCADE; +DROP SCHEMA "publication-1" CASCADE; +SELECT citus_remove_node('localhost', :master_port); + citus_remove_node +--------------------------------------------------------------------- + +(1 row) + +\q diff --git a/src/test/regress/expected/shard_rebalancer.out b/src/test/regress/expected/shard_rebalancer.out index 9eec2cee3..1dea3b442 100644 --- a/src/test/regress/expected/shard_rebalancer.out +++ b/src/test/regress/expected/shard_rebalancer.out @@ -1482,7 +1482,6 @@ SELECT * from master_drain_node('localhost', :worker_2_port); ERROR: cannot use logical replication to transfer shards of the relation colocated_rebalance_test since it doesn't have a REPLICA IDENTITY or PRIMARY KEY DETAIL: UPDATE and DELETE commands on the shard will error out during logical replication unless there is a REPLICA IDENTITY or PRIMARY KEY. HINT: If you wish to continue without a replica identity set the shard_transfer_mode to 'force_logical' or 'block_writes'. -CONTEXT: while executing command on localhost:xxxxx -- Make sure shouldhaveshards is false select shouldhaveshards from pg_dist_node where nodeport = :worker_2_port; shouldhaveshards @@ -2626,6 +2625,127 @@ RESET citus.shard_count; DROP VIEW table_placements_per_node; DELETE FROM pg_catalog.pg_dist_rebalance_strategy WHERE name='capacity_high_worker_2'; DELETE FROM pg_catalog.pg_dist_rebalance_strategy WHERE name='only_worker_1'; +-- add colocation groups with shard group count < worker count +-- the rebalancer should balance those "unbalanced shards" evenly as much as possible +SELECT 1 FROM citus_remove_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +create table single_shard_colocation_1a (a int primary key); +create table single_shard_colocation_1b (a int primary key); +create table single_shard_colocation_1c (a int primary key); +SET citus.shard_replication_factor = 1; +select create_distributed_table('single_shard_colocation_1a','a', colocate_with => 'none', shard_count => 1); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +select create_distributed_table('single_shard_colocation_1b','a',colocate_with=>'single_shard_colocation_1a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +select create_distributed_table('single_shard_colocation_1c','a',colocate_with=>'single_shard_colocation_1b'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +create table single_shard_colocation_2a (a bigint); +create table single_shard_colocation_2b (a bigint); +select create_distributed_table('single_shard_colocation_2a','a', colocate_with => 'none', shard_count => 1); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +select create_distributed_table('single_shard_colocation_2b','a',colocate_with=>'single_shard_colocation_2a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- all shards are placed on the first worker node +SELECT sh.logicalrelid, pl.nodeport + FROM pg_dist_shard sh JOIN pg_dist_shard_placement pl ON sh.shardid = pl.shardid + WHERE sh.logicalrelid::text IN ('single_shard_colocation_1a', 'single_shard_colocation_1b', 'single_shard_colocation_1c', 'single_shard_colocation_2a', 'single_shard_colocation_2b') + ORDER BY sh.logicalrelid; + logicalrelid | nodeport +--------------------------------------------------------------------- + single_shard_colocation_1a | 57637 + single_shard_colocation_1b | 57637 + single_shard_colocation_1c | 57637 + single_shard_colocation_2a | 57637 + single_shard_colocation_2b | 57637 +(5 rows) + +-- add the second node back, then rebalance +ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 16; +select 1 from citus_add_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +select rebalance_table_shards(); +NOTICE: Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ... + rebalance_table_shards +--------------------------------------------------------------------- + +(1 row) + +-- verify some shards are moved to the new node +SELECT sh.logicalrelid, pl.nodeport + FROM pg_dist_shard sh JOIN pg_dist_shard_placement pl ON sh.shardid = pl.shardid + WHERE sh.logicalrelid::text IN ('single_shard_colocation_1a', 'single_shard_colocation_1b', 'single_shard_colocation_1c', 'single_shard_colocation_2a', 'single_shard_colocation_2b') + ORDER BY sh.logicalrelid; + logicalrelid | nodeport +--------------------------------------------------------------------- + single_shard_colocation_1a | 57638 + single_shard_colocation_1b | 57638 + single_shard_colocation_1c | 57638 + single_shard_colocation_2a | 57637 + single_shard_colocation_2b | 57637 +(5 rows) + +DROP TABLE single_shard_colocation_1a, single_shard_colocation_1b, single_shard_colocation_1c, single_shard_colocation_2a, single_shard_colocation_2b CASCADE; +-- verify we detect if one of the tables do not have a replica identity or primary key +-- and error out in case of shard transfer mode = auto +SELECT 1 FROM citus_remove_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +create table table_with_primary_key (a int primary key); +select create_distributed_table('table_with_primary_key','a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +create table table_without_primary_key (a bigint); +select create_distributed_table('table_without_primary_key','a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- add the second node back, then rebalance +ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 16; +select 1 from citus_add_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +select rebalance_table_shards(); +ERROR: cannot use logical replication to transfer shards of the relation table_without_primary_key since it doesn't have a REPLICA IDENTITY or PRIMARY KEY +DROP TABLE table_with_primary_key, table_without_primary_key; \c - - - :worker_1_port SET citus.enable_ddl_propagation TO OFF; REVOKE ALL ON SCHEMA public FROM testrole; diff --git a/src/test/regress/expected/shard_rebalancer_unit.out b/src/test/regress/expected/shard_rebalancer_unit.out index 156edfc6b..9ebd6f942 100644 --- a/src/test/regress/expected/shard_rebalancer_unit.out +++ b/src/test/regress/expected/shard_rebalancer_unit.out @@ -742,3 +742,75 @@ HINT: If you do want these moves to happen, try changing improvement_threshold {"updatetype":1,"shardid":2,"sourcename":"a","sourceport":5432,"targetname":"b","targetport":5432} (2 rows) +-- Test single shard colocation groups +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}']::json[], + ARRAY['{"shardid":1, "cost":20, "nodename":"a"}', + '{"shardid":2, "cost":10, "nodename":"a", "next_colocation": true}', + '{"shardid":3, "cost":10, "nodename":"a", "next_colocation": true}', + '{"shardid":4, "cost":100, "nodename":"a", "next_colocation": true}', + '{"shardid":5, "cost":50, "nodename":"a", "next_colocation": true}', + '{"shardid":6, "cost":50, "nodename":"a", "next_colocation": true}' + ]::json[], + improvement_threshold := 0.1 +)); + unnest +--------------------------------------------------------------------- + {"updatetype":1,"shardid":4,"sourcename":"a","sourceport":5432,"targetname":"b","targetport":5432} + {"updatetype":1,"shardid":1,"sourcename":"a","sourceport":5432,"targetname":"b","targetport":5432} +(2 rows) + +-- Test colocation groups with shard count < worker count +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}', + '{"node_name": "c"}']::json[], + ARRAY['{"shardid":1, "cost":20, "nodename":"a"}', + '{"shardid":2, "cost":10, "nodename":"a"}', + '{"shardid":3, "cost":10, "nodename":"a", "next_colocation": true}', + '{"shardid":4, "cost":100, "nodename":"a"}', + '{"shardid":5, "cost":50, "nodename":"a", "next_colocation": true}', + '{"shardid":6, "cost":50, "nodename":"a"}' + ]::json[], + improvement_threshold := 0.1 +)); + unnest +--------------------------------------------------------------------- + {"updatetype":1,"shardid":4,"sourcename":"a","sourceport":5432,"targetname":"b","targetport":5432} + {"updatetype":1,"shardid":5,"sourcename":"a","sourceport":5432,"targetname":"c","targetport":5432} + {"updatetype":1,"shardid":1,"sourcename":"a","sourceport":5432,"targetname":"c","targetport":5432} +(3 rows) + +-- Test colocation groups with shard count < worker count +-- mixed with a colocation group shard_count > worker count +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}', + '{"node_name": "c"}']::json[], + ARRAY['{"shardid":1, "cost":20, "nodename":"a"}', + '{"shardid":2, "cost":10, "nodename":"a"}', + '{"shardid":3, "cost":10, "nodename":"a", "next_colocation": true}', + '{"shardid":4, "cost":100, "nodename":"a"}', + '{"shardid":5, "cost":50, "nodename":"a", "next_colocation": true}', + '{"shardid":6, "cost":50, "nodename":"a"}', + '{"shardid":7, "cost":50, "nodename":"b", "next_colocation": true}', + '{"shardid":8, "cost":50, "nodename":"b"}', + '{"shardid":9, "cost":50, "nodename":"b"}', + '{"shardid":10, "cost":50, "nodename":"b"}', + '{"shardid":11, "cost":50, "nodename":"b"}', + '{"shardid":12, "cost":50, "nodename":"b"}' + ]::json[], + improvement_threshold := 0.1 +)); + unnest +--------------------------------------------------------------------- + {"updatetype":1,"shardid":7,"sourcename":"b","sourceport":5432,"targetname":"a","targetport":5432} + {"updatetype":1,"shardid":8,"sourcename":"b","sourceport":5432,"targetname":"c","targetport":5432} + {"updatetype":1,"shardid":9,"sourcename":"b","sourceport":5432,"targetname":"a","targetport":5432} + {"updatetype":1,"shardid":10,"sourcename":"b","sourceport":5432,"targetname":"c","targetport":5432} + {"updatetype":1,"shardid":4,"sourcename":"a","sourceport":5432,"targetname":"b","targetport":5432} + {"updatetype":1,"shardid":5,"sourcename":"a","sourceport":5432,"targetname":"c","targetport":5432} + {"updatetype":1,"shardid":1,"sourcename":"a","sourceport":5432,"targetname":"c","targetport":5432} +(7 rows) + diff --git a/src/test/regress/expected/single_node.out b/src/test/regress/expected/single_node.out index 52d087b18..7f152decd 100644 --- a/src/test/regress/expected/single_node.out +++ b/src/test/regress/expected/single_node.out @@ -1829,6 +1829,7 @@ SELECT pg_sleep(0.1); -- backend(s) that execute on the shards will be terminated -- so show that there no internal backends SET search_path TO single_node; +SET citus.next_shard_id TO 90730500; SELECT count(*) from should_commit; count --------------------------------------------------------------------- @@ -1882,6 +1883,7 @@ BEGIN; ROLLBACK; \c - - - :master_port SET search_path TO single_node; +SET citus.next_shard_id TO 90830500; -- simulate that even if there is no connection slots -- to connect, Citus can switch to local execution SET citus.force_max_query_parallelization TO false; @@ -2106,10 +2108,10 @@ NOTICE: executing the command locally: SELECT count(DISTINCT (key)::text) AS co SET citus.shard_replication_factor TO 1; CREATE TABLE test_disabling_drop_and_truncate (a int); SELECT create_distributed_table('test_disabling_drop_and_truncate', 'a'); -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102040, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) USING heap');SELECT worker_apply_shard_ddl_command (102040, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102041, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) USING heap');SELECT worker_apply_shard_ddl_command (102041, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102042, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) USING heap');SELECT worker_apply_shard_ddl_command (102042, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102043, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) USING heap');SELECT worker_apply_shard_ddl_command (102043, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90830500, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) USING heap');SELECT worker_apply_shard_ddl_command (90830500, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90830501, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) USING heap');SELECT worker_apply_shard_ddl_command (90830501, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90830502, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) USING heap');SELECT worker_apply_shard_ddl_command (90830502, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90830503, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) USING heap');SELECT worker_apply_shard_ddl_command (90830503, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') create_distributed_table --------------------------------------------------------------------- @@ -2117,24 +2119,24 @@ NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (1 SET citus.enable_manual_changes_to_shards TO off; -- these should error out -DROP TABLE test_disabling_drop_and_truncate_102040; -ERROR: cannot modify "test_disabling_drop_and_truncate_102040" because it is a shard of a distributed table +DROP TABLE test_disabling_drop_and_truncate_90830500; +ERROR: cannot modify "test_disabling_drop_and_truncate_90830500" because it is a shard of a distributed table HINT: Use the distributed table or set citus.enable_manual_changes_to_shards to on to modify shards directly -TRUNCATE TABLE test_disabling_drop_and_truncate_102040; -ERROR: cannot modify "test_disabling_drop_and_truncate_102040" because it is a shard of a distributed table +TRUNCATE TABLE test_disabling_drop_and_truncate_90830500; +ERROR: cannot modify "test_disabling_drop_and_truncate_90830500" because it is a shard of a distributed table HINT: Use the distributed table or set citus.enable_manual_changes_to_shards to on to modify shards directly RESET citus.enable_manual_changes_to_shards ; -- these should work as expected -TRUNCATE TABLE test_disabling_drop_and_truncate_102040; -DROP TABLE test_disabling_drop_and_truncate_102040; +TRUNCATE TABLE test_disabling_drop_and_truncate_90830500; +DROP TABLE test_disabling_drop_and_truncate_90830500; DROP TABLE test_disabling_drop_and_truncate; -- test creating distributed or reference tables from shards CREATE TABLE test_creating_distributed_relation_table_from_shard (a int); SELECT create_distributed_table('test_creating_distributed_relation_table_from_shard', 'a'); -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102044, 'single_node', 'CREATE TABLE single_node.test_creating_distributed_relation_table_from_shard (a integer) USING heap');SELECT worker_apply_shard_ddl_command (102044, 'single_node', 'ALTER TABLE single_node.test_creating_distributed_relation_table_from_shard OWNER TO postgres') -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102045, 'single_node', 'CREATE TABLE single_node.test_creating_distributed_relation_table_from_shard (a integer) USING heap');SELECT worker_apply_shard_ddl_command (102045, 'single_node', 'ALTER TABLE single_node.test_creating_distributed_relation_table_from_shard OWNER TO postgres') -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102046, 'single_node', 'CREATE TABLE single_node.test_creating_distributed_relation_table_from_shard (a integer) USING heap');SELECT worker_apply_shard_ddl_command (102046, 'single_node', 'ALTER TABLE single_node.test_creating_distributed_relation_table_from_shard OWNER TO postgres') -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102047, 'single_node', 'CREATE TABLE single_node.test_creating_distributed_relation_table_from_shard (a integer) USING heap');SELECT worker_apply_shard_ddl_command (102047, 'single_node', 'ALTER TABLE single_node.test_creating_distributed_relation_table_from_shard OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90830504, 'single_node', 'CREATE TABLE single_node.test_creating_distributed_relation_table_from_shard (a integer) USING heap');SELECT worker_apply_shard_ddl_command (90830504, 'single_node', 'ALTER TABLE single_node.test_creating_distributed_relation_table_from_shard OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90830505, 'single_node', 'CREATE TABLE single_node.test_creating_distributed_relation_table_from_shard (a integer) USING heap');SELECT worker_apply_shard_ddl_command (90830505, 'single_node', 'ALTER TABLE single_node.test_creating_distributed_relation_table_from_shard OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90830506, 'single_node', 'CREATE TABLE single_node.test_creating_distributed_relation_table_from_shard (a integer) USING heap');SELECT worker_apply_shard_ddl_command (90830506, 'single_node', 'ALTER TABLE single_node.test_creating_distributed_relation_table_from_shard OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90830507, 'single_node', 'CREATE TABLE single_node.test_creating_distributed_relation_table_from_shard (a integer) USING heap');SELECT worker_apply_shard_ddl_command (90830507, 'single_node', 'ALTER TABLE single_node.test_creating_distributed_relation_table_from_shard OWNER TO postgres') create_distributed_table --------------------------------------------------------------------- @@ -2142,11 +2144,11 @@ NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (1 -- these should error because shards cannot be used to: -- create distributed table -SELECT create_distributed_table('test_creating_distributed_relation_table_from_shard_102044', 'a'); -ERROR: relation "test_creating_distributed_relation_table_from_shard_102044" is a shard relation +SELECT create_distributed_table('test_creating_distributed_relation_table_from_shard_90830504', 'a'); +ERROR: relation "test_creating_distributed_relation_table_from_shard_90830504" is a shard relation -- create reference table -SELECT create_reference_table('test_creating_distributed_relation_table_from_shard_102044'); -ERROR: relation "test_creating_distributed_relation_table_from_shard_102044" is a shard relation +SELECT create_reference_table('test_creating_distributed_relation_table_from_shard_90830504'); +ERROR: relation "test_creating_distributed_relation_table_from_shard_90830504" is a shard relation RESET citus.shard_replication_factor; DROP TABLE test_creating_distributed_relation_table_from_shard; -- lets flush the copy often to make sure everyhing is fine diff --git a/src/test/regress/expected/single_node_0.out b/src/test/regress/expected/single_node_0.out index 247b8839d..a21cdd28a 100644 --- a/src/test/regress/expected/single_node_0.out +++ b/src/test/regress/expected/single_node_0.out @@ -1829,6 +1829,7 @@ SELECT pg_sleep(0.1); -- backend(s) that execute on the shards will be terminated -- so show that there no internal backends SET search_path TO single_node; +SET citus.next_shard_id TO 90730500; SELECT count(*) from should_commit; count --------------------------------------------------------------------- @@ -1882,6 +1883,7 @@ BEGIN; ROLLBACK; \c - - - :master_port SET search_path TO single_node; +SET citus.next_shard_id TO 90830500; -- simulate that even if there is no connection slots -- to connect, Citus can switch to local execution SET citus.force_max_query_parallelization TO false; @@ -2106,10 +2108,10 @@ NOTICE: executing the command locally: SELECT count(DISTINCT (key)::text) AS co SET citus.shard_replication_factor TO 1; CREATE TABLE test_disabling_drop_and_truncate (a int); SELECT create_distributed_table('test_disabling_drop_and_truncate', 'a'); -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102040, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) USING heap');SELECT worker_apply_shard_ddl_command (102040, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102041, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) USING heap');SELECT worker_apply_shard_ddl_command (102041, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102042, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) USING heap');SELECT worker_apply_shard_ddl_command (102042, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102043, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) USING heap');SELECT worker_apply_shard_ddl_command (102043, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90830500, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) USING heap');SELECT worker_apply_shard_ddl_command (90830500, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90830501, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) USING heap');SELECT worker_apply_shard_ddl_command (90830501, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90830502, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) USING heap');SELECT worker_apply_shard_ddl_command (90830502, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90830503, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) USING heap');SELECT worker_apply_shard_ddl_command (90830503, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') create_distributed_table --------------------------------------------------------------------- @@ -2117,24 +2119,24 @@ NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (1 SET citus.enable_manual_changes_to_shards TO off; -- these should error out -DROP TABLE test_disabling_drop_and_truncate_102040; -ERROR: cannot modify "test_disabling_drop_and_truncate_102040" because it is a shard of a distributed table +DROP TABLE test_disabling_drop_and_truncate_90830500; +ERROR: cannot modify "test_disabling_drop_and_truncate_90830500" because it is a shard of a distributed table HINT: Use the distributed table or set citus.enable_manual_changes_to_shards to on to modify shards directly -TRUNCATE TABLE test_disabling_drop_and_truncate_102040; -ERROR: cannot modify "test_disabling_drop_and_truncate_102040" because it is a shard of a distributed table +TRUNCATE TABLE test_disabling_drop_and_truncate_90830500; +ERROR: cannot modify "test_disabling_drop_and_truncate_90830500" because it is a shard of a distributed table HINT: Use the distributed table or set citus.enable_manual_changes_to_shards to on to modify shards directly RESET citus.enable_manual_changes_to_shards ; -- these should work as expected -TRUNCATE TABLE test_disabling_drop_and_truncate_102040; -DROP TABLE test_disabling_drop_and_truncate_102040; +TRUNCATE TABLE test_disabling_drop_and_truncate_90830500; +DROP TABLE test_disabling_drop_and_truncate_90830500; DROP TABLE test_disabling_drop_and_truncate; -- test creating distributed or reference tables from shards CREATE TABLE test_creating_distributed_relation_table_from_shard (a int); SELECT create_distributed_table('test_creating_distributed_relation_table_from_shard', 'a'); -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102044, 'single_node', 'CREATE TABLE single_node.test_creating_distributed_relation_table_from_shard (a integer) USING heap');SELECT worker_apply_shard_ddl_command (102044, 'single_node', 'ALTER TABLE single_node.test_creating_distributed_relation_table_from_shard OWNER TO postgres') -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102045, 'single_node', 'CREATE TABLE single_node.test_creating_distributed_relation_table_from_shard (a integer) USING heap');SELECT worker_apply_shard_ddl_command (102045, 'single_node', 'ALTER TABLE single_node.test_creating_distributed_relation_table_from_shard OWNER TO postgres') -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102046, 'single_node', 'CREATE TABLE single_node.test_creating_distributed_relation_table_from_shard (a integer) USING heap');SELECT worker_apply_shard_ddl_command (102046, 'single_node', 'ALTER TABLE single_node.test_creating_distributed_relation_table_from_shard OWNER TO postgres') -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102047, 'single_node', 'CREATE TABLE single_node.test_creating_distributed_relation_table_from_shard (a integer) USING heap');SELECT worker_apply_shard_ddl_command (102047, 'single_node', 'ALTER TABLE single_node.test_creating_distributed_relation_table_from_shard OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90830504, 'single_node', 'CREATE TABLE single_node.test_creating_distributed_relation_table_from_shard (a integer) USING heap');SELECT worker_apply_shard_ddl_command (90830504, 'single_node', 'ALTER TABLE single_node.test_creating_distributed_relation_table_from_shard OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90830505, 'single_node', 'CREATE TABLE single_node.test_creating_distributed_relation_table_from_shard (a integer) USING heap');SELECT worker_apply_shard_ddl_command (90830505, 'single_node', 'ALTER TABLE single_node.test_creating_distributed_relation_table_from_shard OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90830506, 'single_node', 'CREATE TABLE single_node.test_creating_distributed_relation_table_from_shard (a integer) USING heap');SELECT worker_apply_shard_ddl_command (90830506, 'single_node', 'ALTER TABLE single_node.test_creating_distributed_relation_table_from_shard OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90830507, 'single_node', 'CREATE TABLE single_node.test_creating_distributed_relation_table_from_shard (a integer) USING heap');SELECT worker_apply_shard_ddl_command (90830507, 'single_node', 'ALTER TABLE single_node.test_creating_distributed_relation_table_from_shard OWNER TO postgres') create_distributed_table --------------------------------------------------------------------- @@ -2142,11 +2144,11 @@ NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (1 -- these should error because shards cannot be used to: -- create distributed table -SELECT create_distributed_table('test_creating_distributed_relation_table_from_shard_102044', 'a'); -ERROR: relation "test_creating_distributed_relation_table_from_shard_102044" is a shard relation +SELECT create_distributed_table('test_creating_distributed_relation_table_from_shard_90830504', 'a'); +ERROR: relation "test_creating_distributed_relation_table_from_shard_90830504" is a shard relation -- create reference table -SELECT create_reference_table('test_creating_distributed_relation_table_from_shard_102044'); -ERROR: relation "test_creating_distributed_relation_table_from_shard_102044" is a shard relation +SELECT create_reference_table('test_creating_distributed_relation_table_from_shard_90830504'); +ERROR: relation "test_creating_distributed_relation_table_from_shard_90830504" is a shard relation RESET citus.shard_replication_factor; DROP TABLE test_creating_distributed_relation_table_from_shard; -- lets flush the copy often to make sure everyhing is fine diff --git a/src/test/regress/expected/single_node_truncate.out b/src/test/regress/expected/single_node_truncate.out index 7d94dc744..18883a4df 100644 --- a/src/test/regress/expected/single_node_truncate.out +++ b/src/test/regress/expected/single_node_truncate.out @@ -1,6 +1,7 @@ CREATE SCHEMA single_node_truncate; SET search_path TO single_node_truncate; SET citus.shard_replication_factor TO 1; +SET citus.next_shard_id TO 91630500; -- helper view that prints out local table names and sizes in the schema CREATE VIEW table_sizes AS SELECT @@ -28,12 +29,12 @@ CREATE TABLE citus_local(id int, ref_id int REFERENCES ref(id)); INSERT INTO citus_local SELECT x,x FROM generate_series(1,10000) x; -- verify that shell tables for citus local tables are empty SELECT * FROM table_sizes; - name | has_data + name | has_data --------------------------------------------------------------------- - citus_local | f - citus_local_102049 | t - ref | t - ref_102048 | t + citus_local | f + citus_local_91630501 | t + ref | t + ref_91630500 | t (4 rows) -- verify that this UDF is noop on Citus local tables @@ -44,12 +45,12 @@ SELECT truncate_local_data_after_distributing_table('citus_local'); (1 row) SELECT * FROM table_sizes; - name | has_data + name | has_data --------------------------------------------------------------------- - citus_local | f - citus_local_102049 | t - ref | t - ref_102048 | t + citus_local | f + citus_local_91630501 | t + ref | t + ref_91630500 | t (4 rows) -- test that we allow cascading truncates to citus local tables @@ -62,12 +63,12 @@ NOTICE: truncate cascades to table "citus_local" (1 row) SELECT * FROM table_sizes; - name | has_data + name | has_data --------------------------------------------------------------------- - citus_local | f - citus_local_102049 | t - ref | f - ref_102048 | t + citus_local | f + citus_local_91630501 | t + ref | f + ref_91630500 | t (4 rows) ROLLBACK; @@ -95,17 +96,17 @@ NOTICE: truncate cascades to table "dist" (1 row) SELECT * FROM table_sizes; - name | has_data + name | has_data --------------------------------------------------------------------- - citus_local | f - citus_local_102049 | t - dist | f - dist_102051 | t - dist_102052 | t - dist_102053 | t - dist_102054 | t - ref | f - ref_102048 | t + citus_local | f + citus_local_91630501 | t + dist | f + dist_91630503 | t + dist_91630504 | t + dist_91630505 | t + dist_91630506 | t + ref | f + ref_91630500 | t (9 rows) ROLLBACK; @@ -118,17 +119,17 @@ SELECT truncate_local_data_after_distributing_table('dist'); (1 row) SELECT * FROM table_sizes; - name | has_data + name | has_data --------------------------------------------------------------------- - citus_local | f - citus_local_102049 | t - dist | f - dist_102051 | t - dist_102052 | t - dist_102053 | t - dist_102054 | t - ref | t - ref_102048 | t + citus_local | f + citus_local_91630501 | t + dist | f + dist_91630503 | t + dist_91630504 | t + dist_91630505 | t + dist_91630506 | t + ref | t + ref_91630500 | t (9 rows) ROLLBACK; diff --git a/src/test/regress/expected/split_shard.out b/src/test/regress/expected/split_shard.out index 069ff306f..7186b27d2 100644 --- a/src/test/regress/expected/split_shard.out +++ b/src/test/regress/expected/split_shard.out @@ -61,7 +61,9 @@ SET search_path TO split_shard_replication_setup_schema; CREATE TABLE table_to_split_2(id bigserial PRIMARY KEY, value char); CREATE TABLE table_to_split_3(id bigserial PRIMARY KEY, value char); -- Create publication at worker1 +SET citus.enable_ddl_propagation TO off; CREATE PUBLICATION pub1 FOR TABLE table_to_split_1, table_to_split_2, table_to_split_3; +RESET citus.enable_ddl_propagation; SELECT count(*) FROM pg_catalog.worker_split_shard_replication_setup(ARRAY[ ROW(1, 'id', 2, '-2147483648', '-1', :worker_2_node)::pg_catalog.split_shard_info, ROW(1, 'id', 3, '0', '2147483647', :worker_2_node)::pg_catalog.split_shard_info @@ -261,7 +263,9 @@ SELECT nodeid AS worker_2_node FROM pg_dist_node WHERE nodeport=:worker_2_port \ \c - - - :worker_1_port SET search_path TO split_shard_replication_setup_schema; -- Create publication at worker1 +SET citus.enable_ddl_propagation TO off; CREATE PUBLICATION pub1 FOR TABLE table_to_split_1, table_to_split_2, table_to_split_3; +RESET citus.enable_ddl_propagation; SELECT count(*) FROM pg_catalog.worker_split_shard_replication_setup(ARRAY[ ROW(1, 'id', 2, '-2147483648', '-1', :worker_1_node)::pg_catalog.split_shard_info, ROW(1, 'id', 3, '0', '2147483647', :worker_2_node)::pg_catalog.split_shard_info @@ -428,7 +432,9 @@ SELECT nodeid AS worker_2_node FROM pg_dist_node WHERE nodeport=:worker_2_port \ SET search_path TO split_shard_replication_setup_schema; SET client_min_messages TO ERROR; -- Create publication at worker1 +SET citus.enable_ddl_propagation TO off; CREATE PUBLICATION pub1 for table table_to_split_1, table_to_split_2, table_to_split_3; +RESET citus.enable_ddl_propagation; -- Worker1 is target for table_to_split_2 and table_to_split_3 SELECT count(*) FROM pg_catalog.worker_split_shard_replication_setup(ARRAY[ ROW(1, 'id', 2, '-2147483648', '-1', :worker_1_node)::pg_catalog.split_shard_info, @@ -597,8 +603,10 @@ CREATE TABLE table_second_9(id bigserial PRIMARY KEY, value char); -- Create publication at worker1 \c - postgres - :worker_1_port SET search_path TO split_shard_replication_setup_schema; +SET citus.enable_ddl_propagation TO off; CREATE PUBLICATION pub1 FOR TABLE table_first_4, table_first_5, table_first_6; CREATE PUBLICATION pub2 FOR TABLE table_second_7, table_second_8, table_second_9; +RESET citus.enable_ddl_propagation; SELECT count(*) FROM pg_catalog.worker_split_shard_replication_setup(ARRAY[ ROW(4, 'id', 5, '-2147483648', '-1', :worker_2_node)::pg_catalog.split_shard_info, ROW(4, 'id', 6, '0', '2147483647', :worker_2_node)::pg_catalog.split_shard_info, diff --git a/src/test/regress/expected/upgrade_citus_finish_citus_upgrade.out b/src/test/regress/expected/upgrade_citus_finish_citus_upgrade.out index 8c46aae43..bb80d9103 100644 --- a/src/test/regress/expected/upgrade_citus_finish_citus_upgrade.out +++ b/src/test/regress/expected/upgrade_citus_finish_citus_upgrade.out @@ -21,3 +21,12 @@ FROM pg_dist_node_metadata, pg_extension WHERE extname = 'citus'; -- still, do not NOTICE the version as it changes per release SET client_min_messages TO WARNING; CALL citus_finish_citus_upgrade(); +-- we should be able to sync metadata in nontransactional way as well +SET citus.metadata_sync_mode TO 'nontransactional'; +SELECT start_metadata_sync_to_all_nodes(); + start_metadata_sync_to_all_nodes +--------------------------------------------------------------------- + t +(1 row) + +RESET citus.metadata_sync_mode; diff --git a/src/test/regress/expected/upgrade_columnar_after.out b/src/test/regress/expected/upgrade_columnar_after.out index 0da9bb17f..768a057f9 100644 --- a/src/test/regress/expected/upgrade_columnar_after.out +++ b/src/test/regress/expected/upgrade_columnar_after.out @@ -228,10 +228,12 @@ BEGIN; 22 (1 row) - -- make sure that serial is preserved - -- since we run "after schedule" twice and "rollback" wouldn't undo - -- sequence changes, it can be 22 or 33, not a different value - SELECT max(id) in (22, 33) FROM text_data; + -- Make sure that serial is preserved. + -- + -- Since we might run "after schedule" several times for flaky test + -- detection and "rollback" wouldn't undo sequence changes, "id" should + -- look like below: + SELECT max(id) >= 11 AND max(id) % 11 = 0 FROM text_data; ?column? --------------------------------------------------------------------- t @@ -265,7 +267,12 @@ ROLLBACK; SELECT pg_class.oid INTO columnar_schema_members FROM pg_class, pg_namespace WHERE pg_namespace.oid=pg_class.relnamespace AND - pg_namespace.nspname='columnar_internal'; + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); SELECT refobjid INTO columnar_schema_members_pg_depend FROM pg_depend WHERE classid = 'pg_am'::regclass::oid AND @@ -283,8 +290,8 @@ UNION (0 rows) -- ... , and both columnar_schema_members_pg_depend & columnar_schema_members --- should have 10 entries. -SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; +-- should have 5 entries. +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; ?column? --------------------------------------------------------------------- t @@ -292,12 +299,17 @@ SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; -- Check the same for workers too. -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ SELECT pg_class.oid INTO columnar_schema_members FROM pg_class, pg_namespace WHERE pg_namespace.oid=pg_class.relnamespace AND - pg_namespace.nspname='columnar_internal'; + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); SELECT refobjid INTO columnar_schema_members_pg_depend FROM pg_depend WHERE classid = 'pg_am'::regclass::oid AND @@ -308,44 +320,44 @@ WHERE classid = 'pg_am'::regclass::oid AND deptype = 'n'; $$ ); - run_command_on_workers + success | result --------------------------------------------------------------------- - (localhost,10201,t,"SELECT 10") - (localhost,10202,t,"SELECT 10") + t | SELECT 5 + t | SELECT 5 (2 rows) -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ (TABLE columnar_schema_members EXCEPT TABLE columnar_schema_members_pg_depend) UNION (TABLE columnar_schema_members_pg_depend EXCEPT TABLE columnar_schema_members); $$ ); - run_command_on_workers + success | result --------------------------------------------------------------------- - (localhost,10201,t,"") - (localhost,10202,t,"") + t | + t | (2 rows) -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ -SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; $$ ); - run_command_on_workers + success | result --------------------------------------------------------------------- - (localhost,10201,t,t) - (localhost,10202,t,t) + t | t + t | t (2 rows) -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; $$ ); - run_command_on_workers + success | result --------------------------------------------------------------------- - (localhost,10201,t,"DROP TABLE") - (localhost,10202,t,"DROP TABLE") + t | DROP TABLE + t | DROP TABLE (2 rows) diff --git a/src/test/regress/expected/upgrade_columnar_before.out b/src/test/regress/expected/upgrade_columnar_before.out index 28c252e30..a4895c770 100644 --- a/src/test/regress/expected/upgrade_columnar_before.out +++ b/src/test/regress/expected/upgrade_columnar_before.out @@ -1,5 +1,27 @@ -- Test if relying on topological sort of the objects, not their names, works -- fine when re-creating objects during pg_upgrade. +DO +$$ +BEGIN +IF EXISTS (SELECT * FROM pg_namespace WHERE nspname = 'upgrade_columnar') +THEN + -- Drop the the table leftover from the earlier run of + -- upgrade_columnar_before.sql. Similarly, drop the fake public schema + -- created before and rename the original one (renamed to citus_schema) + -- back to public. + -- + -- This can only happen if upgrade_columnar_before.sql is run multiple + -- times for flaky test detection. + DROP TABLE citus_schema.new_columnar_table; + DROP SCHEMA public CASCADE; + ALTER SCHEMA citus_schema RENAME TO public; + + SET LOCAL client_min_messages TO WARNING; + DROP SCHEMA upgrade_columnar CASCADE; +END IF; +END +$$ +LANGUAGE plpgsql; ALTER SCHEMA public RENAME TO citus_schema; SET search_path TO citus_schema; -- As mentioned in https://github.com/citusdata/citus/issues/5447, it diff --git a/src/test/regress/expected/upgrade_list_citus_objects.out b/src/test/regress/expected/upgrade_list_citus_objects.out index 7cd2f63c8..a234c4bac 100644 --- a/src/test/regress/expected/upgrade_list_citus_objects.out +++ b/src/test/regress/expected/upgrade_list_citus_objects.out @@ -74,7 +74,11 @@ ORDER BY 1; function citus_internal_delete_partition_metadata(regclass) function citus_internal_delete_shard_metadata(bigint) function citus_internal_global_blocked_processes() + function citus_internal_is_replication_origin_tracking_active() function citus_internal_local_blocked_processes() + function citus_internal_mark_node_not_synced(integer,integer) + function citus_internal_start_replication_origin_tracking() + function citus_internal_stop_replication_origin_tracking() function citus_internal_update_placement_metadata(bigint,integer,integer) function citus_internal_update_relation_colocation(oid,integer) function citus_is_clock_after(cluster_clock,cluster_clock) @@ -230,6 +234,7 @@ ORDER BY 1; function truncate_local_data_after_distributing_table(regclass) function undistribute_table(regclass,boolean) function update_distributed_table_colocation(regclass,text) + function worker_adjust_identity_column_seq_ranges(regclass) function worker_apply_inter_shard_ddl_command(bigint,text,bigint,text,text) function worker_apply_sequence_command(text) function worker_apply_sequence_command(text,regtype) @@ -241,6 +246,7 @@ ORDER BY 1; function worker_create_or_replace_object(text) function worker_create_or_replace_object(text[]) function worker_create_truncate_trigger(regclass) + function worker_drop_all_shell_tables(boolean) function worker_drop_distributed_table(text) function worker_drop_sequence_dependency(text) function worker_drop_shell_table(text) @@ -318,5 +324,5 @@ ORDER BY 1; view citus_stat_statements view pg_dist_shard_placement view time_partitions -(310 rows) +(316 rows) diff --git a/src/test/regress/expected/upgrade_post_11_after.out b/src/test/regress/expected/upgrade_post_11_after.out index d7d7c46b0..cf41da8e1 100644 --- a/src/test/regress/expected/upgrade_post_11_after.out +++ b/src/test/regress/expected/upgrade_post_11_after.out @@ -25,11 +25,11 @@ SELECT pg_identify_object_as_address(classid, objid, objsubid) FROM pg_catalog.p (19 rows) -- on all nodes -SELECT run_command_on_workers($$SELECT array_agg(pg_identify_object_as_address(classid, objid, objsubid)) FROM pg_catalog.pg_dist_object WHERE objid IN ('post_11_upgrade'::regnamespace, 'post_11_upgrade.part_table'::regclass, 'post_11_upgrade.sensors'::regclass, 'post_11_upgrade.func_in_transaction_def'::regproc, 'post_11_upgrade.partial_index_test_config'::regconfig, 'post_11_upgrade.my_type'::regtype, 'post_11_upgrade.view_for_upgrade_test'::regclass, 'post_11_upgrade.view_for_upgrade_test_my_type'::regclass, 'post_11_upgrade.non_dist_upgrade_ref_view_2'::regclass, 'post_11_upgrade.reporting_line'::regclass) ORDER BY 1;$$) ORDER BY 1; +SELECT run_command_on_workers($$SELECT array_agg(worker_object) FROM (SELECT pg_identify_object_as_address(classid, objid, objsubid) worker_object FROM pg_catalog.pg_dist_object WHERE objid IN ('post_11_upgrade'::regnamespace, 'post_11_upgrade.part_table'::regclass, 'post_11_upgrade.sensors'::regclass, 'post_11_upgrade.func_in_transaction_def'::regproc, 'post_11_upgrade.partial_index_test_config'::regconfig, 'post_11_upgrade.my_type'::regtype, 'post_11_upgrade.view_for_upgrade_test'::regclass, 'post_11_upgrade.view_for_upgrade_test_my_type'::regclass, 'post_11_upgrade.non_dist_upgrade_ref_view_2'::regclass, 'post_11_upgrade.reporting_line'::regclass) ORDER BY 1) worker_objects;$$) ORDER BY 1; run_command_on_workers --------------------------------------------------------------------- - (localhost,57636,t,"{""(type,{post_11_upgrade.my_type},{})"",""(function,\\""{post_11_upgrade,func_in_transaction_def}\\"",{})"",""(table,\\""{post_11_upgrade,part_table}\\"",{})"",""(table,\\""{post_11_upgrade,sensors}\\"",{})"",""(view,\\""{post_11_upgrade,view_for_upgrade_test}\\"",{})"",""(view,\\""{post_11_upgrade,view_for_upgrade_test_my_type}\\"",{})"",""(view,\\""{post_11_upgrade,non_dist_upgrade_ref_view_2}\\"",{})"",""(view,\\""{post_11_upgrade,reporting_line}\\"",{})"",""(schema,{post_11_upgrade},{})"",""(\\""text search configuration\\"",\\""{post_11_upgrade,partial_index_test_config}\\"",{})""}") - (localhost,57637,t,"{""(type,{post_11_upgrade.my_type},{})"",""(function,\\""{post_11_upgrade,func_in_transaction_def}\\"",{})"",""(table,\\""{post_11_upgrade,part_table}\\"",{})"",""(table,\\""{post_11_upgrade,sensors}\\"",{})"",""(view,\\""{post_11_upgrade,view_for_upgrade_test}\\"",{})"",""(view,\\""{post_11_upgrade,view_for_upgrade_test_my_type}\\"",{})"",""(view,\\""{post_11_upgrade,non_dist_upgrade_ref_view_2}\\"",{})"",""(view,\\""{post_11_upgrade,reporting_line}\\"",{})"",""(schema,{post_11_upgrade},{})"",""(\\""text search configuration\\"",\\""{post_11_upgrade,partial_index_test_config}\\"",{})""}") + (localhost,57636,t,"{""(function,\\""{post_11_upgrade,func_in_transaction_def}\\"",{})"",""(schema,{post_11_upgrade},{})"",""(table,\\""{post_11_upgrade,part_table}\\"",{})"",""(table,\\""{post_11_upgrade,sensors}\\"",{})"",""(\\""text search configuration\\"",\\""{post_11_upgrade,partial_index_test_config}\\"",{})"",""(type,{post_11_upgrade.my_type},{})"",""(view,\\""{post_11_upgrade,non_dist_upgrade_ref_view_2}\\"",{})"",""(view,\\""{post_11_upgrade,reporting_line}\\"",{})"",""(view,\\""{post_11_upgrade,view_for_upgrade_test}\\"",{})"",""(view,\\""{post_11_upgrade,view_for_upgrade_test_my_type}\\"",{})""}") + (localhost,57637,t,"{""(function,\\""{post_11_upgrade,func_in_transaction_def}\\"",{})"",""(schema,{post_11_upgrade},{})"",""(table,\\""{post_11_upgrade,part_table}\\"",{})"",""(table,\\""{post_11_upgrade,sensors}\\"",{})"",""(\\""text search configuration\\"",\\""{post_11_upgrade,partial_index_test_config}\\"",{})"",""(type,{post_11_upgrade.my_type},{})"",""(view,\\""{post_11_upgrade,non_dist_upgrade_ref_view_2}\\"",{})"",""(view,\\""{post_11_upgrade,reporting_line}\\"",{})"",""(view,\\""{post_11_upgrade,view_for_upgrade_test}\\"",{})"",""(view,\\""{post_11_upgrade,view_for_upgrade_test_my_type}\\"",{})""}") (2 rows) -- Create the necessary test utility function diff --git a/src/test/regress/expected/worker_split_copy_test.out b/src/test/regress/expected/worker_split_copy_test.out index 67d515198..f4fae57e0 100644 --- a/src/test/regress/expected/worker_split_copy_test.out +++ b/src/test/regress/expected/worker_split_copy_test.out @@ -142,8 +142,90 @@ SELECT COUNT(*) FROM worker_split_copy_test."test !/ \n _""dist_123_table_810700 (1 row) -- END: List updated row count for local targets shard. +-- Check that GENERATED columns are handled properly in a shard split operation. +\c - - - :master_port +SET search_path TO worker_split_copy_test; +SET citus.shard_count TO 2; +SET citus.shard_replication_factor TO 1; +SET citus.next_shard_id TO 81080000; +-- BEGIN: Create distributed table and insert data. +CREATE TABLE worker_split_copy_test.dist_table_with_generated_col(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char, col_todrop int); +SELECT create_distributed_table('dist_table_with_generated_col', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Check that dropped columns are filtered out in COPY command. +ALTER TABLE dist_table_with_generated_col DROP COLUMN col_todrop; +INSERT INTO dist_table_with_generated_col (id, value) (SELECT g.id, 'N' FROM generate_series(1, 1000) AS g(id)); +-- END: Create distributed table and insert data. +-- BEGIN: Create target shards in Worker1 and Worker2 for a 2-way split copy. +\c - - - :worker_1_port +CREATE TABLE worker_split_copy_test.dist_table_with_generated_col_81080015(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char); +\c - - - :worker_2_port +CREATE TABLE worker_split_copy_test.dist_table_with_generated_col_81080016(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char); +-- BEGIN: List row count for source shard and targets shard in Worker1. +\c - - - :worker_1_port +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080000; + count +--------------------------------------------------------------------- + 510 +(1 row) + +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080015; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- BEGIN: List row count for target shard in Worker2. +\c - - - :worker_2_port +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080016; + count +--------------------------------------------------------------------- + 0 +(1 row) + +\c - - - :worker_1_port +SELECT * from worker_split_copy( + 81080000, -- source shard id to copy + 'id', + ARRAY[ + -- split copy info for split children 1 + ROW(81080015, -- destination shard id + -2147483648, -- split range begin + -1073741824, --split range end + :worker_1_node)::pg_catalog.split_copy_info, + -- split copy info for split children 2 + ROW(81080016, --destination shard id + -1073741823, --split range begin + -1, --split range end + :worker_2_node)::pg_catalog.split_copy_info + ] + ); + worker_split_copy +--------------------------------------------------------------------- + +(1 row) + +\c - - - :worker_1_port +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080015; + count +--------------------------------------------------------------------- + 247 +(1 row) + +\c - - - :worker_2_port +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080016; + count +--------------------------------------------------------------------- + 263 +(1 row) + -- BEGIN: CLEANUP. \c - - - :master_port SET client_min_messages TO WARNING; +CALL citus_cleanup_orphaned_resources(); DROP SCHEMA worker_split_copy_test CASCADE; -- END: CLEANUP. diff --git a/src/test/regress/failure_schedule b/src/test/regress/failure_schedule index 816f9d9e2..afc4780bf 100644 --- a/src/test/regress/failure_schedule +++ b/src/test/regress/failure_schedule @@ -32,6 +32,7 @@ test: failure_single_mod test: failure_savepoints test: failure_multi_row_insert test: failure_mx_metadata_sync +test: failure_mx_metadata_sync_multi_trans test: failure_connection_establishment # this test syncs metadata to the workers diff --git a/src/test/regress/isolation_schedule b/src/test/regress/isolation_schedule index 90d1463ad..ca893ba8d 100644 --- a/src/test/regress/isolation_schedule +++ b/src/test/regress/isolation_schedule @@ -107,4 +107,10 @@ test: isolation_multiuser_locking test: isolation_acquire_distributed_locks test: isolation_concurrent_move_create_table +# MERGE +test: isolation_merge +test: isolation_merge_replicated + + +# Note: Always keep this test at the end test: isolation_check_mx diff --git a/src/test/regress/minimal_schedule b/src/test/regress/minimal_schedule index ef2d3dc65..8b0cfff70 100644 --- a/src/test/regress/minimal_schedule +++ b/src/test/regress/minimal_schedule @@ -1,2 +1,2 @@ test: minimal_cluster_management -test: multi_test_helpers multi_test_helpers_superuser columnar_test_helpers multi_test_catalog_views tablespace +test: multi_test_helpers multi_test_helpers_superuser multi_create_fdw columnar_test_helpers multi_test_catalog_views tablespace diff --git a/src/test/regress/multi_1_schedule b/src/test/regress/multi_1_schedule index 5e2cd17c1..ee81bde38 100644 --- a/src/test/regress/multi_1_schedule +++ b/src/test/regress/multi_1_schedule @@ -19,7 +19,7 @@ test: multi_extension test: single_node test: relation_access_tracking_single_node test: single_node_truncate -test: multi_test_helpers multi_test_helpers_superuser +test: multi_test_helpers multi_test_helpers_superuser multi_create_fdw test: multi_cluster_management # below tests are placed right after multi_cluster_management as we do @@ -91,13 +91,6 @@ test: drop_partitioned_table test: multi_fix_partition_shard_index_names test: partition_wise_join -# ---------- -# Tests for foreign data wrapper support -# ---------- -test: multi_create_fdw - - - # ---------- # Tests for statistics propagation # ---------- diff --git a/src/test/regress/multi_mx_schedule b/src/test/regress/multi_mx_schedule index 9465f4c28..181f9288f 100644 --- a/src/test/regress/multi_mx_schedule +++ b/src/test/regress/multi_mx_schedule @@ -43,7 +43,8 @@ test: coordinator_evaluation_modify test: coordinator_evaluation_select test: multi_mx_call test: multi_mx_function_call_delegation -test: multi_mx_modifications local_shard_execution_replicated +test: multi_mx_modifications +test: local_shard_execution_replicated # the following test has to be run sequentially test: local_shard_execution test: multi_mx_repartition_udt_w1 multi_mx_repartition_udt_w2 diff --git a/src/test/regress/operations_schedule b/src/test/regress/operations_schedule index 15afd9e18..f5e77c835 100644 --- a/src/test/regress/operations_schedule +++ b/src/test/regress/operations_schedule @@ -5,6 +5,7 @@ test: shard_rebalancer_unit test: shard_rebalancer test: background_rebalance test: worker_copy_table_to_node +test: background_rebalance_parallel test: foreign_key_to_reference_shard_rebalance test: multi_move_mx test: shard_move_deferred_delete diff --git a/src/test/regress/pg_regress_multi.pl b/src/test/regress/pg_regress_multi.pl index 119e6a758..544cd6ba1 100755 --- a/src/test/regress/pg_regress_multi.pl +++ b/src/test/regress/pg_regress_multi.pl @@ -49,6 +49,7 @@ sub Usage() print " --pg_ctl-timeout Timeout for pg_ctl\n"; print " --connection-timeout Timeout for connecting to worker nodes\n"; print " --mitmproxy Start a mitmproxy for one of the workers\n"; + print " --worker-count Number of workers in Citus cluster (default: 2)\n"; exit 1; } @@ -84,10 +85,12 @@ my $mitmFifoPath = catfile($TMP_CHECKDIR, "mitmproxy.fifo"); my $conninfo = ""; my $publicWorker1Host = "localhost"; my $publicWorker2Host = "localhost"; +my $workerCount = 2; my $serversAreShutdown = "TRUE"; my $usingWindows = 0; my $mitmPid = 0; +my $workerCount = 2; if ($Config{osname} eq "MSWin32") { @@ -116,6 +119,7 @@ GetOptions( 'conninfo=s' => \$conninfo, 'worker-1-public-hostname=s' => \$publicWorker1Host, 'worker-2-public-hostname=s' => \$publicWorker2Host, + 'worker-count=i' => \$workerCount, 'help' => sub { Usage() }); my $fixopen = "$bindir/postgres.fixopen"; @@ -318,7 +322,6 @@ my $mitmPort = 9060; # Set some default configuration options my $masterPort = 57636; -my $workerCount = 2; my @workerHosts = (); my @workerPorts = (); @@ -485,6 +488,7 @@ push(@pgOptions, "citus.explain_analyze_sort_method='taskId'"); push(@pgOptions, "citus.enable_manual_changes_to_shards=on"); push(@pgOptions, "citus.allow_unsafe_locks_from_workers=on"); push(@pgOptions, "citus.stat_statements_track = 'all'"); +push(@pgOptions, "citus.enable_change_data_capture=on"); # Some tests look at shards in pg_class, make sure we can usually see them: push(@pgOptions, "citus.show_shards_for_app_name_prefixes='pg_regress'"); diff --git a/src/test/regress/spec/isolation_merge.spec b/src/test/regress/spec/isolation_merge.spec new file mode 100644 index 000000000..042ce9155 --- /dev/null +++ b/src/test/regress/spec/isolation_merge.spec @@ -0,0 +1,92 @@ +// +// How we organize this isolation test spec, is explained at README.md file in this directory. +// + +// create distributed tables to test behavior of MERGE in concurrent operations +setup +{ + DO + $do$ + DECLARE ver int; + BEGIN + SELECT substring(version(), '\d+')::int into ver; + IF (ver < 15) + THEN + RAISE EXCEPTION 'MERGE is not supported on PG versions below 15'; + END IF; + END + $do$; + + SET citus.shard_replication_factor TO 1; + SELECT 1 FROM master_add_node('localhost', 57637); + SELECT 1 FROM master_add_node('localhost', 57638); + + CREATE TABLE prept(t1 int, t2 int); + CREATE TABLE preps(s1 int, s2 int); + SELECT create_distributed_table('prept', 't1'), create_distributed_table('preps', 's1'); + INSERT INTO prept VALUES(100, 0); + INSERT INTO preps VALUES(100, 0); + INSERT INTO preps VALUES(200, 0); +} + +// drop distributed tables +teardown +{ + DROP TABLE IF EXISTS prept CASCADE; + DROP TABLE IF EXISTS preps CASCADE; +} + +// session 1 +session "s1" + +step "s1-begin" { BEGIN; } + +step "s1-upd-ins" { MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED THEN UPDATE SET t2 = t2 + 1 + WHEN NOT MATCHED THEN INSERT VALUES(s1, s2); + } + +step "s1-del-ins" { MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED THEN DELETE + WHEN NOT MATCHED THEN INSERT VALUES(s1, s2); + } + +step "s1-del" { MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED THEN DELETE; + } + +step "s1-ins" { MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN NOT MATCHED THEN INSERT VALUES(s1, s2); + } + +step "s1-commit" { COMMIT; } +step "s1-result" { SELECT * FROM prept ORDER BY 1; } + +// session 2 +session "s2" + +step "s2-begin" { BEGIN; } + +step "s2-upd-del" { MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED AND prept.t2 = 0 THEN DELETE + WHEN MATCHED THEN UPDATE SET t2 = t2 + 1; + } + +step "s2-upd" { MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED THEN UPDATE SET t2 = t2 + 1; + } + +step "s2-ins" { MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN NOT MATCHED THEN INSERT VALUES(s1, s2); + } + +step "s2-commit" { COMMIT; } +step "s2-result" { SELECT * FROM prept ORDER BY 1; } + +// permutations - MERGE vs MERGE +permutation "s1-begin" "s1-upd-ins" "s2-result" "s1-commit" "s2-result" +permutation "s1-begin" "s1-upd-ins" "s2-begin" "s2-upd-del" "s1-commit" "s2-commit" "s2-result" +permutation "s2-begin" "s2-upd-del" "s1-begin" "s1-upd-ins" "s2-commit" "s1-commit" "s2-result" +permutation "s1-begin" "s1-upd-ins" "s2-begin" "s2-upd" "s1-commit" "s2-commit" "s2-result" +permutation "s2-begin" "s2-ins" "s1-begin" "s1-del" "s2-upd" "s2-result" "s2-commit" "s1-commit" "s2-result" +permutation "s1-begin" "s1-del-ins" "s2-begin" "s2-upd" "s1-result" "s1-ins" "s1-commit" "s2-upd" "s2-commit" "s2-result" diff --git a/src/test/regress/spec/isolation_merge_replicated.spec b/src/test/regress/spec/isolation_merge_replicated.spec new file mode 100644 index 000000000..a586bdfa1 --- /dev/null +++ b/src/test/regress/spec/isolation_merge_replicated.spec @@ -0,0 +1,58 @@ +// +// How we organize this isolation test spec, is explained at README.md file in this directory. +// + +// create distributed tables to test behavior of MERGE in concurrent operations +setup +{ + DO + $do$ + DECLARE ver int; + BEGIN + SELECT substring(version(), '\d+')::int into ver; + IF (ver < 15) + THEN + RAISE EXCEPTION 'MERGE is not supported on PG versions below 15'; + END IF; + END + $do$; + + SET citus.shard_replication_factor TO 2; + SELECT 1 FROM master_add_node('localhost', 57637); + SELECT 1 FROM master_add_node('localhost', 57638); + + CREATE TABLE prept(t1 int, t2 int); + CREATE TABLE preps(s1 int, s2 int); + SELECT create_distributed_table('prept', 't1'), create_distributed_table('preps', 's1'); + INSERT INTO prept VALUES(100, 0); + INSERT INTO preps VALUES(100, 0); + INSERT INTO preps VALUES(200, 0); +} + +// drop distributed tables +teardown +{ + DROP TABLE IF EXISTS prept CASCADE; + DROP TABLE IF EXISTS preps CASCADE; +} + +// session 1 +session "s1" + +step "s1-begin" { BEGIN; } +step "s1-upd-ins" { MERGE INTO prept USING preps ON prept.t1 = preps.s1 + WHEN MATCHED THEN UPDATE SET t2 = t2 + 1 + WHEN NOT MATCHED THEN INSERT VALUES(s1, s2); } +step "s1-result" { SELECT * FROM preps ORDER BY 1; } +step "s1-commit" { COMMIT; } + +// session 2 +session "s2" + +step "s2-begin" { BEGIN; } +step "s2-update" { UPDATE preps SET s2 = s2 + 1; } +step "s2-commit" { COMMIT; } +step "s2-result" { SELECT * FROM prept ORDER BY 1; } + +// permutations - MERGE vs UPDATE (on source) +permutation "s1-begin" "s1-upd-ins" "s2-begin" "s2-update" "s1-commit" "s2-commit" "s1-result" "s2-result" diff --git a/src/test/regress/sql/aggregate_support.sql b/src/test/regress/sql/aggregate_support.sql index a991d856e..bccac491e 100644 --- a/src/test/regress/sql/aggregate_support.sql +++ b/src/test/regress/sql/aggregate_support.sql @@ -364,6 +364,8 @@ $$); set role notsuper; select array_collect_sort(val) from aggdata; reset role; +drop owned by notsuper; +drop user notsuper; -- Test aggregation on coordinator set citus.coordinator_aggregation_strategy to 'row-gather'; @@ -645,5 +647,34 @@ CREATE AGGREGATE newavg ( SELECT run_command_on_workers($$select aggfnoid from pg_aggregate where aggfnoid::text like '%newavg%';$$); +CREATE TYPE coord AS (x int, y int); + +CREATE FUNCTION coord_minx_sfunc(state coord, new coord) +returns coord immutable language plpgsql as $$ +BEGIN + IF (state IS NULL OR new.x < state.x) THEN + RETURN new; + ELSE + RETURN state; + END IF; +END +$$; + +create function coord_minx_finalfunc(state coord) +returns coord immutable language plpgsql as $$ +begin return state; +end; +$$; + +-- custom aggregate that has the same name as a built-in function, but with a combinefunc +create aggregate min (coord) ( + sfunc = coord_minx_sfunc, + stype = coord, + finalfunc = coord_minx_finalfunc, + combinefunc = coord_minx_sfunc +); + +select min((id,val)::coord) from aggdata; + set client_min_messages to error; drop schema aggregate_support cascade; diff --git a/src/test/regress/sql/arbitrary_configs_router.sql b/src/test/regress/sql/arbitrary_configs_router.sql new file mode 100644 index 000000000..f59c5fa4a --- /dev/null +++ b/src/test/regress/sql/arbitrary_configs_router.sql @@ -0,0 +1,634 @@ +SET search_path TO arbitrary_configs_router; + +SET client_min_messages TO WARNING; + +-- test simple select for a single row +SELECT * FROM articles_hash WHERE author_id = 10 AND id = 50; + +-- get all titles by a single author +SELECT title FROM articles_hash WHERE author_id = 10; + +-- try ordering them by word count +SELECT title, word_count FROM articles_hash + WHERE author_id = 10 + ORDER BY word_count DESC NULLS LAST; + +-- look at last two articles by an author +SELECT title, id FROM articles_hash + WHERE author_id = 5 + ORDER BY id + LIMIT 2; + +-- find all articles by two authors in same shard +-- but plan is not fast path router plannable due to +-- two distribution columns in the query +SELECT title, author_id FROM articles_hash + WHERE author_id = 7 OR author_id = 8 + ORDER BY author_id ASC, id; + +-- having clause is supported if it goes to a single shard +-- and single dist. key on the query +SELECT author_id, sum(word_count) AS corpus_size FROM articles_hash + WHERE author_id = 1 + GROUP BY author_id + HAVING sum(word_count) > 1000 + ORDER BY sum(word_count) DESC; + +-- fast path planner only support = operator +SELECT * FROM articles_hash WHERE author_id <= 1; +SELECT * FROM articles_hash WHERE author_id IN (1, 3) ORDER BY 1,2,3,4; + +-- queries with CTEs cannot go through fast-path planning +WITH first_author AS ( SELECT id FROM articles_hash WHERE author_id = 1) +SELECT * FROM first_author; + +-- two CTE joins also cannot go through fast-path planning +WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1), +id_title AS (SELECT id, title from articles_hash WHERE author_id = 1) +SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; + +-- this is a different case where each CTE is recursively planned and those goes +-- through the fast-path router planner, but the top level join is not +WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1), +id_title AS (SELECT id, title from articles_hash WHERE author_id = 2) +SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; + +-- recursive CTEs are also cannot go through fast +-- path planning +WITH RECURSIVE hierarchy as ( + SELECT *, 1 AS level + FROM company_employees + WHERE company_id = 1 and manager_id = 0 + UNION + SELECT ce.*, (h.level+1) + FROM hierarchy h JOIN company_employees ce + ON (h.employee_id = ce.manager_id AND + h.company_id = ce.company_id AND + ce.company_id = 1)) +SELECT * FROM hierarchy WHERE LEVEL <= 2; + +WITH update_article AS ( + UPDATE articles_hash SET word_count = 10 WHERE id = 1 AND word_count = 9 RETURNING * +) +SELECT * FROM update_article; + +WITH delete_article AS ( + DELETE FROM articles_hash WHERE id = 1 AND word_count = 10 RETURNING * +) +SELECT * FROM delete_article; + +-- grouping sets are supported via fast-path +SELECT + id, substring(title, 2, 1) AS subtitle, count(*) + FROM articles_hash + WHERE author_id = 1 + GROUP BY GROUPING SETS ((id),(subtitle)) + ORDER BY id, subtitle; + +-- queries which involve functions in FROM clause are not supported via fast path planning +SELECT * FROM articles_hash, position('om' in 'Thomas') WHERE author_id = 1; + +-- sublinks are not supported via fast path planning +SELECT * FROM articles_hash +WHERE author_id IN (SELECT author_id FROM articles_hash WHERE author_id = 2) +ORDER BY articles_hash.id; + +-- subqueries are not supported via fast path planning +SELECT articles_hash.id,test.word_count +FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test WHERE test.id = articles_hash.id +ORDER BY test.word_count DESC, articles_hash.id LIMIT 5; + +SELECT articles_hash.id,test.word_count +FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test +WHERE test.id = articles_hash.id and articles_hash.author_id = 1 +ORDER BY articles_hash.id; + +-- simple lookup query just works +SELECT * + FROM articles_hash + WHERE author_id = 1; + +-- below query hits a single shard but with multiple filters +-- so cannot go via fast-path +SELECT * + FROM articles_hash + WHERE author_id = 1 OR author_id = 17; + +-- rename the output columns +SELECT id as article_id, word_count * id as random_value + FROM articles_hash + WHERE author_id = 1; + +-- joins do not go through fast-path planning +SELECT a.author_id as first_author, b.word_count as second_word_count + FROM articles_hash a, articles_hash b + WHERE a.author_id = 10 and a.author_id = b.author_id + LIMIT 3; + +-- single shard select with limit goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + LIMIT 3; + +-- single shard select with limit + offset goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + LIMIT 2 + OFFSET 1; + +-- single shard select with limit + offset + order by goes through fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id desc + LIMIT 2 + OFFSET 1; + +-- single shard select with group by on non-partition column goes through fast-path planning +SELECT id + FROM articles_hash + WHERE author_id = 1 + GROUP BY id + ORDER BY id; + +-- single shard select with distinct goes through fast-path planning +SELECT DISTINCT id + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; + +-- single shard aggregate goes through fast-path planning +SELECT avg(word_count) + FROM articles_hash + WHERE author_id = 2; + +-- max, min, sum, count goes through fast-path planning +SELECT max(word_count) as max, min(word_count) as min, + sum(word_count) as sum, count(word_count) as cnt + FROM articles_hash + WHERE author_id = 2; + + +-- queries with aggregates and group by goes through fast-path planning +SELECT max(word_count) + FROM articles_hash + WHERE author_id = 1 + GROUP BY author_id; + + +-- set operations are not supported via fast-path planning +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 3 +) AS combination +ORDER BY id; + +-- function calls in the target list is supported via fast path +SELECT LEFT(title, 1) FROM articles_hash WHERE author_id = 1; + + +-- top-level union queries are supported through recursive planning + +-- unions in subqueries are not supported via fast-path planning +SELECT * FROM ( + (SELECT * FROM articles_hash WHERE author_id = 1) + UNION + (SELECT * FROM articles_hash WHERE author_id = 1)) uu +ORDER BY 1, 2 +LIMIT 5; + + +-- Test various filtering options for router plannable check + +-- cannot go through fast-path if there is +-- explicit coercion +SELECT * + FROM articles_hash + WHERE author_id = 1::bigint; + +-- can go through fast-path if there is +-- implicit coercion +-- This doesn't work see the related issue +-- reported https://github.com/citusdata/citus/issues/2605 +-- SELECT * +-- FROM articles_hash +-- WHERE author_id = 1.0; + +SELECT * + FROM articles_hash + WHERE author_id = 68719476736; -- this is bigint + +-- cannot go through fast-path due to +-- multiple filters on the dist. key +SELECT * + FROM articles_hash + WHERE author_id = 1 and author_id >= 1; + +-- cannot go through fast-path due to +-- multiple filters on the dist. key +SELECT * + FROM articles_hash + WHERE author_id = 1 or id = 1; + +-- goes through fast-path planning because +-- the dist. key is ANDed with the rest of the +-- filters +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = 1 or id = 41); + +-- this time there is an OR clause which prevents +-- router planning at all +SELECT * + FROM articles_hash + WHERE author_id = 1 and id = 1 or id = 41; + +-- goes through fast-path planning because +-- the dist. key is ANDed with the rest of the +-- filters +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = random()::int * 0); + +-- not router plannable due to function call on the right side +SELECT * + FROM articles_hash + WHERE author_id = (random()::int * 0 + 1); + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE author_id = abs(-1); + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE 1 = abs(author_id); + +-- Citus does not qualify this as a fast-path because +-- dist_key = func() +SELECT * + FROM articles_hash + WHERE author_id = abs(author_id - 2); + +-- the function is not on the dist. key, so qualify as +-- fast-path +SELECT * + FROM articles_hash + WHERE author_id = 1 and (id = abs(id - 2)); + +-- not router plannable due to is true +SELECT * + FROM articles_hash + WHERE (author_id = 1) is true; + +-- router plannable, (boolean expression) = true is collapsed to (boolean expression) +SELECT * + FROM articles_hash + WHERE (author_id = 1) = true; + +-- some more complex quals +SELECT count(*) FROM articles_hash WHERE (author_id = 15) AND (id = 1 OR word_count > 5); +SELECT count(*) FROM articles_hash WHERE (author_id = 15) OR (id = 1 AND word_count > 5); +SELECT count(*) FROM articles_hash WHERE (id = 15) OR (author_id = 1 AND word_count > 5); +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (author_id = 1 OR word_count > 5); +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (author_id = 1 AND (word_count > 5 OR id = 2)); +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND (word_count > 5 OR author_id = 2)); +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND (word_count > 5 AND author_id = 2)); +SELECT count(*) FROM articles_hash WHERE (id = 15) AND (title ilike 'a%' AND ((word_count > 5 OR title ilike 'b%' ) AND (author_id = 2 AND word_count > 50))); + +-- fast-path router plannable, between operator is on another column +SELECT * + FROM articles_hash + WHERE (author_id = 1) and id between 0 and 20; + +-- fast-path router plannable, partition column expression is and'ed to rest +SELECT * + FROM articles_hash + WHERE (author_id = 1) and (id = 1 or id = 31) and title like '%s'; + +-- fast-path router plannable, order is changed +SELECT * + FROM articles_hash + WHERE (id = 1 or id = 31) and title like '%s' and (author_id = 1); + +-- fast-path router plannable +SELECT * + FROM articles_hash + WHERE (title like '%s' or title like 'a%') and (author_id = 1); + +-- fast-path router plannable +SELECT * + FROM articles_hash + WHERE (title like '%s' or title like 'a%') and (author_id = 1) and (word_count < 3000 or word_count > 8000); + +-- window functions are supported with fast-path router plannable +SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count + FROM articles_hash + WHERE author_id = 5; + +SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count + FROM articles_hash + WHERE author_id = 5 + ORDER BY word_count DESC; + +SELECT id, MIN(id) over (order by word_count) + FROM articles_hash + WHERE author_id = 1; + +SELECT id, word_count, AVG(word_count) over (order by word_count) + FROM articles_hash + WHERE author_id = 1; + +SELECT word_count, rank() OVER (PARTITION BY author_id ORDER BY word_count) + FROM articles_hash + WHERE author_id = 1; + +-- some more tests on complex target lists +SELECT DISTINCT ON (author_id, id) author_id, id, + MIN(id) over (order by avg(word_count)) * AVG(id * 5.2 + (1.0/max(word_count))) over (order by max(word_count)) as t1, + count(*) FILTER (WHERE title LIKE 'al%') as cnt_with_filter, + count(*) FILTER (WHERE '0300030' LIKE '%3%') as cnt_with_filter_2, + avg(case when id > 2 then char_length(word_count::text) * (id * strpos(word_count::text, '1')) end) as case_cnt, + COALESCE(strpos(avg(word_count)::text, '1'), 20) + FROM articles_hash as aliased_table + WHERE author_id = 1 + GROUP BY author_id, id + HAVING count(DISTINCT title) > 0 + ORDER BY author_id, id, sum(word_count) - avg(char_length(title)) DESC, COALESCE(array_upper(ARRAY[max(id)],1) * 5,0) DESC; + +-- where false queries are router plannable but not fast-path +SELECT * + FROM articles_hash + WHERE false; + +-- fast-path with false +SELECT * + FROM articles_hash + WHERE author_id = 1 and false; + +-- fast-path with false +SELECT * + FROM articles_hash + WHERE author_id = 1 and 1=0; + +SELECT * + FROM articles_hash + WHERE null and author_id = 1; + +-- we cannot qualify dist_key = X operator Y via +-- fast-path planning +SELECT * + FROM articles_hash + WHERE author_id = 1 + 1; + +-- where false with immutable function returning false +-- goes through fast-path +SELECT * + FROM articles_hash a + WHERE a.author_id = 10 and int4eq(1, 2); + +-- partition_column is null clause does not prune out any shards, +-- all shards remain after shard pruning, not router plannable +-- not fast-path router either +SELECT * + FROM articles_hash a + WHERE a.author_id is null; + +-- partition_column equals to null clause prunes out all shards +-- no shards after shard pruning, router plannable +-- not fast-path router either +SELECT * + FROM articles_hash a + WHERE a.author_id = null; + +-- union/difference /intersection with where false +-- this query was not originally router plannable, addition of 1=0 +-- makes it router plannable but not fast-path +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 2 and 1=0 +) AS combination +ORDER BY id; + +-- same with the above, but with WHERE false +SELECT * FROM ( + SELECT * FROM articles_hash WHERE author_id = 1 + UNION + SELECT * FROM articles_hash WHERE author_id = 2 and 1=0 +) AS combination WHERE false +ORDER BY id; + +-- window functions with where false +SELECT word_count, rank() OVER (PARTITION BY author_id ORDER BY word_count) + FROM articles_hash + WHERE author_id = 1 and 1=0; + +-- complex query hitting a single shard and a fast-path +SELECT + count(DISTINCT CASE + WHEN + word_count > 100 + THEN + id + ELSE + NULL + END) as c + FROM + articles_hash + WHERE + author_id = 5; +-- queries inside transactions can be fast-path router plannable +BEGIN; +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; +END; + +-- queries inside read-only transactions can be fast-path router plannable +SET TRANSACTION READ ONLY; +SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; +END; + +-- cursor queries are fast-path router plannable +BEGIN; +DECLARE test_cursor CURSOR FOR + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; +FETCH test_cursor; +FETCH ALL test_cursor; +FETCH test_cursor; -- fetch one row after the last +FETCH BACKWARD test_cursor; +END; + +-- queries inside copy can be router plannable +COPY ( + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id) TO STDOUT; + +-- table creation queries inside can be fast-path router plannable +CREATE TEMP TABLE temp_articles_hash as + SELECT * + FROM articles_hash + WHERE author_id = 1 + ORDER BY id; + +-- fast-path router plannable queries may include filter for aggregates +SELECT count(*), count(*) FILTER (WHERE id < 3) + FROM articles_hash + WHERE author_id = 1; + +-- prepare queries can be router plannable +PREPARE author_1_articles as + SELECT * + FROM articles_hash + WHERE author_id = 1; + +EXECUTE author_1_articles; +EXECUTE author_1_articles; +EXECUTE author_1_articles; +EXECUTE author_1_articles; +EXECUTE author_1_articles; +EXECUTE author_1_articles; + +-- parametric prepare queries can be router plannable +PREPARE author_articles(int) as + SELECT * + FROM articles_hash + WHERE author_id = $1; + +EXECUTE author_articles(1); +EXECUTE author_articles(1); +EXECUTE author_articles(1); +EXECUTE author_articles(1); +EXECUTE author_articles(1); +EXECUTE author_articles(1); + +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); +EXECUTE author_articles(NULL); + +PREPARE author_articles_update(int) AS + UPDATE articles_hash SET title = 'test' WHERE author_id = $1; + +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); +EXECUTE author_articles_update(NULL); + +-- we don't want too many details. though we're omitting +-- "DETAIL: distribution column value:", we see it acceptable +-- since the query results verifies the correctness +\set VERBOSITY terse + +SELECT author_articles_max_id(); +SELECT author_articles_max_id(); +SELECT author_articles_max_id(); +SELECT author_articles_max_id(); +SELECT author_articles_max_id(); +SELECT author_articles_max_id(); + +SELECT author_articles_max_id(1); +SELECT author_articles_max_id(1); +SELECT author_articles_max_id(1); +SELECT author_articles_max_id(1); +SELECT author_articles_max_id(1); +SELECT author_articles_max_id(1); + +SELECT * FROM author_articles_id_word_count(); +SELECT * FROM author_articles_id_word_count(); +SELECT * FROM author_articles_id_word_count(); +SELECT * FROM author_articles_id_word_count(); +SELECT * FROM author_articles_id_word_count(); +SELECT * FROM author_articles_id_word_count(); + +SELECT * FROM author_articles_id_word_count(1); +SELECT * FROM author_articles_id_word_count(1); +SELECT * FROM author_articles_id_word_count(1); +SELECT * FROM author_articles_id_word_count(1); +SELECT * FROM author_articles_id_word_count(1); +SELECT * FROM author_articles_id_word_count(1); + +\set VERBOSITY default + +-- insert .. select via coordinator could also +-- use fast-path queries +PREPARE insert_sel(int, int) AS +INSERT INTO articles_hash + SELECT * FROM articles_hash WHERE author_id = $2 AND word_count = $1 OFFSET 0; + +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); +EXECUTE insert_sel(1,1); + +-- one final interesting preperad statement +-- where one of the filters is on the target list +PREPARE fast_path_agg_filter(int, int) AS + SELECT + count(*) FILTER (WHERE word_count=$1) + FROM + articles_hash + WHERE author_id = $2; + +EXECUTE fast_path_agg_filter(1,1); +EXECUTE fast_path_agg_filter(2,2); +EXECUTE fast_path_agg_filter(3,3); +EXECUTE fast_path_agg_filter(4,4); +EXECUTE fast_path_agg_filter(5,5); +EXECUTE fast_path_agg_filter(6,6); + +-- views internally become subqueries, so not fast-path router query +SELECT * FROM test_view; + +-- materialized views can be created for fast-path router plannable queries +CREATE MATERIALIZED VIEW mv_articles_hash_empty AS + SELECT * FROM articles_hash WHERE author_id = 1; +SELECT * FROM mv_articles_hash_empty; + + +SELECT id + FROM articles_hash + WHERE author_id = 1; + +INSERT INTO articles_hash VALUES (51, 1, 'amateus', 1814), (52, 1, 'second amateus', 2824); + +-- verify insert is successfull (not router plannable and executable) +SELECT id + FROM articles_hash + WHERE author_id = 1; + +SELECT count(*) FROM collections_list WHERE key = 4; +SELECT count(*) FROM collections_list_1 WHERE key = 4; +SELECT count(*) FROM collections_list_2 WHERE key = 4; +UPDATE collections_list SET value = 15 WHERE key = 4; +SELECT count(*) FILTER (where value = 15) FROM collections_list WHERE key = 4; +SELECT count(*) FILTER (where value = 15) FROM collections_list_1 WHERE key = 4; +SELECT count(*) FILTER (where value = 15) FROM collections_list_2 WHERE key = 4; + +-- test INSERT using values from generate_series() and repeat() functions +INSERT INTO authors_reference (id, name) VALUES (generate_series(1, 10), repeat('Migjeni', 3)); +SELECT * FROM authors_reference ORDER BY 1, 2; diff --git a/src/test/regress/sql/arbitrary_configs_router_create.sql b/src/test/regress/sql/arbitrary_configs_router_create.sql new file mode 100644 index 000000000..956100c7e --- /dev/null +++ b/src/test/regress/sql/arbitrary_configs_router_create.sql @@ -0,0 +1,118 @@ +CREATE SCHEMA arbitrary_configs_router; +SET search_path TO arbitrary_configs_router; + +CREATE TABLE articles_hash ( + id bigint NOT NULL, + author_id bigint NOT NULL, + title varchar(20) NOT NULL, + word_count integer +); + +SELECT create_distributed_table('articles_hash', 'author_id'); + +CREATE TABLE authors_reference (id int, name text); +SELECT create_reference_table('authors_reference'); + +-- create a bunch of test data +INSERT INTO articles_hash VALUES (1, 1, 'arsenous', 9572), (2, 2, 'abducing', 13642),( 3, 3, 'asternal', 10480),( 4, 4, 'altdorfer', 14551),( 5, 5, 'aruru', 11389), + (6, 6, 'atlases', 15459),(7, 7, 'aseptic', 12298),( 8, 8, 'agatized', 16368),(9, 9, 'alligate', 438), + (10, 10, 'aggrandize', 17277),(11, 1, 'alamo', 1347),(12, 2, 'archiblast', 18185), + (13, 3, 'aseyev', 2255),(14, 4, 'andesite', 19094),(15, 5, 'adversa', 3164), + (16, 6, 'allonym', 2),(17, 7, 'auriga', 4073),(18, 8, 'assembly', 911),(19, 9, 'aubergiste', 4981), + (20, 10, 'absentness', 1820),(21, 1, 'arcading', 5890),(22, 2, 'antipope', 2728),(23, 3, 'abhorring', 6799), + (24, 4, 'audacious', 3637),(25, 5, 'antehall', 7707),(26, 6, 'abington', 4545),(27, 7, 'arsenous', 8616), + (28, 8, 'aerophyte', 5454),(29, 9, 'amateur', 9524),(30, 10, 'andelee', 6363),(31, 1, 'athwartships', 7271), + (32, 2, 'amazon', 11342),(33, 3, 'autochrome', 8180),(34, 4, 'amnestied', 12250),(35, 5, 'aminate', 9089), + (36, 6, 'ablation', 13159),(37, 7, 'archduchies', 9997),(38, 8, 'anatine', 14067),(39, 9, 'anchises', 10906), + (40, 10, 'attemper', 14976),(41, 1, 'aznavour', 11814),(42, 2, 'ausable', 15885),(43, 3, 'affixal', 12723), + (44, 4, 'anteport', 16793),(45, 5, 'afrasia', 864),(46, 6, 'atlanta', 17702),(47, 7, 'abeyance', 1772), + (48, 8, 'alkylic', 18610),(49, 9, 'anyone', 2681),(50, 10, 'anjanette', 19519); + +CREATE TABLE company_employees (company_id int, employee_id int, manager_id int); + +SELECT create_distributed_table('company_employees', 'company_id', 'hash'); + +INSERT INTO company_employees values(1, 1, 0); +INSERT INTO company_employees values(1, 2, 1); +INSERT INTO company_employees values(1, 3, 1); +INSERT INTO company_employees values(1, 4, 2); +INSERT INTO company_employees values(1, 5, 4); + +INSERT INTO company_employees values(3, 1, 0); +INSERT INTO company_employees values(3, 15, 1); +INSERT INTO company_employees values(3, 3, 1); + +-- finally, some tests with partitioned tables +CREATE TABLE collections_list ( + key bigint, + ts timestamptz, + collection_id integer, + value numeric +) PARTITION BY LIST (collection_id ); + +CREATE TABLE collections_list_1 + PARTITION OF collections_list (key, ts, collection_id, value) + FOR VALUES IN ( 1 ); + +CREATE TABLE collections_list_2 + PARTITION OF collections_list (key, ts, collection_id, value) + FOR VALUES IN ( 2 ); + +SELECT create_distributed_table('collections_list', 'key'); +INSERT INTO collections_list SELECT i % 10, now(), (i % 2) + 1, i*i FROM generate_series(0, 50)i; + +-- queries inside plpgsql functions could be router plannable +CREATE OR REPLACE FUNCTION author_articles_max_id() RETURNS int AS $$ +DECLARE + max_id integer; +BEGIN + SELECT MAX(id) FROM articles_hash ah + WHERE author_id = 1 + into max_id; + return max_id; +END; +$$ LANGUAGE plpgsql; + +-- queries inside plpgsql functions could be router plannable +CREATE OR REPLACE FUNCTION author_articles_max_id(int) RETURNS int AS $$ +DECLARE + max_id integer; +BEGIN + SELECT MAX(id) FROM articles_hash ah + WHERE author_id = $1 + into max_id; + return max_id; +END; +$$ LANGUAGE plpgsql; + +-- check that function returning setof query are router plannable +CREATE OR REPLACE FUNCTION author_articles_id_word_count() RETURNS TABLE(id bigint, word_count int) AS $$ +DECLARE +BEGIN + RETURN QUERY + SELECT ah.id, ah.word_count + FROM articles_hash ah + WHERE author_id = 1; + +END; +$$ LANGUAGE plpgsql; + +-- check that function returning setof query are router plannable +CREATE OR REPLACE FUNCTION author_articles_id_word_count(int) RETURNS TABLE(id bigint, word_count int) AS $$ +DECLARE +BEGIN + RETURN QUERY + SELECT ah.id, ah.word_count + FROM articles_hash ah + WHERE author_id = $1; + +END; +$$ LANGUAGE plpgsql; + +-- Suppress the warning that tells that the view won't be distributed +-- because it depends on a local table. +-- +-- This only happens when running PostgresConfig. +SET client_min_messages TO ERROR; +CREATE VIEW test_view AS + SELECT * FROM articles_hash WHERE author_id = 1; diff --git a/src/test/regress/sql/background_rebalance.sql b/src/test/regress/sql/background_rebalance.sql index 4d105655b..59b296576 100644 --- a/src/test/regress/sql/background_rebalance.sql +++ b/src/test/regress/sql/background_rebalance.sql @@ -104,6 +104,8 @@ SELECT 1 FROM citus_rebalance_start(shard_transfer_mode := 'force_logical'); SELECT citus_rebalance_wait(); SELECT state, details from citus_rebalance_status(); +SELECT public.wait_for_resource_cleanup(); + -- Remove coordinator again to allow rerunning of this test SELECT 1 FROM citus_remove_node('localhost', :master_port); SELECT public.wait_until_metadata_sync(30000); diff --git a/src/test/regress/sql/background_rebalance_parallel.sql b/src/test/regress/sql/background_rebalance_parallel.sql new file mode 100644 index 000000000..8c5fb5bb1 --- /dev/null +++ b/src/test/regress/sql/background_rebalance_parallel.sql @@ -0,0 +1,141 @@ +/* + Test to check if the background tasks scheduled by the background rebalancer + has the correct dependencies. +*/ +CREATE SCHEMA background_rebalance_parallel; +SET search_path TO background_rebalance_parallel; +SET citus.next_shard_id TO 85674000; +SET citus.shard_replication_factor TO 1; +SET client_min_messages TO WARNING; + +ALTER SEQUENCE pg_dist_background_job_job_id_seq RESTART 17777; +ALTER SEQUENCE pg_dist_background_task_task_id_seq RESTART 1000; +ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 50050; + +SELECT nextval('pg_catalog.pg_dist_groupid_seq') AS last_group_id_cls \gset +SELECT nextval('pg_catalog.pg_dist_node_nodeid_seq') AS last_node_id_cls \gset +ALTER SEQUENCE pg_catalog.pg_dist_groupid_seq RESTART 50; +ALTER SEQUENCE pg_catalog.pg_dist_node_nodeid_seq RESTART 50; + +SELECT 1 FROM master_remove_node('localhost', :worker_1_port); +SELECT 1 FROM master_remove_node('localhost', :worker_2_port); + +SELECT 1 FROM master_add_node('localhost', :worker_1_port); +SELECT 1 FROM master_add_node('localhost', :worker_2_port); + +ALTER SYSTEM SET citus.background_task_queue_interval TO '1s'; +SELECT pg_reload_conf(); + +/* Colocation group 1: create two tables table1_colg1, table2_colg1 and in a colocation group */ +CREATE TABLE table1_colg1 (a int PRIMARY KEY); +SELECT create_distributed_table('table1_colg1', 'a', shard_count => 4 , colocate_with => 'none'); + +CREATE TABLE table2_colg1 (b int PRIMARY KEY); + +SELECT create_distributed_table('table2_colg1', 'b' , colocate_with => 'table1_colg1'); + +/* Colocation group 2: create two tables table1_colg2, table2_colg2 and in a colocation group */ +CREATE TABLE table1_colg2 (a int PRIMARY KEY); + +SELECT create_distributed_table('table1_colg2 ', 'a', shard_count => 4, colocate_with => 'none'); + +CREATE TABLE table2_colg2 (b int primary key); + +SELECT create_distributed_table('table2_colg2', 'b' , colocate_with => 'table1_colg2'); + +/* Colocation group 3: create two tables table1_colg3, table2_colg3 and in a colocation group */ +CREATE TABLE table1_colg3 (a int PRIMARY KEY); + +SELECT create_distributed_table('table1_colg3 ', 'a', shard_count => 4, colocate_with => 'none'); + +CREATE TABLE table2_colg3 (b int primary key); + +SELECT create_distributed_table('table2_colg3', 'b' , colocate_with => 'table1_colg3'); + + +/* Add two new node so that we can rebalance */ +SELECT 1 FROM citus_add_node('localhost', :worker_3_port); +SELECT 1 FROM citus_add_node('localhost', :worker_4_port); + +SELECT * FROM get_rebalance_table_shards_plan() ORDER BY shardid; + +SELECT * FROM citus_rebalance_start(); + +SELECT citus_rebalance_wait(); + +/*Check that a move is dependent on + 1. any other move scheduled earlier in its colocation group. + 2. any other move scheduled earlier whose source node or target + node overlaps with the current moves nodes. */ +SELECT S.shardid, P.colocationid +FROM pg_dist_shard S, pg_dist_partition P +WHERE S.logicalrelid = P.logicalrelid ORDER BY S.shardid ASC; + +SELECT D.task_id, + (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.task_id), + D.depends_on, + (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.depends_on) +FROM pg_dist_background_task_depend D WHERE job_id = 17777 ORDER BY D.task_id, D.depends_on ASC; + + +/* Check that if there is a reference table that needs to be synched to a node, + any move without a dependency must depend on the move task for reference table. */ +SELECT 1 FROM citus_drain_node('localhost',:worker_4_port); +SELECT public.wait_for_resource_cleanup(); +SELECT 1 FROM citus_disable_node('localhost', :worker_4_port, synchronous:=true); + +/* Drain worker_3 so that we can move only one colocation group to worker_3 + to create an unbalance that would cause parallel rebalancing. */ +SELECT 1 FROM citus_drain_node('localhost',:worker_3_port); +SELECT citus_set_node_property('localhost', :worker_3_port, 'shouldhaveshards', true); + +CALL citus_cleanup_orphaned_resources(); + +CREATE TABLE ref_table(a int PRIMARY KEY); + +SELECT create_reference_table('ref_table'); + +/* Move all the shards of Colocation group 3 to worker_3.*/ +SELECT +master_move_shard_placement(shardid, 'localhost', nodeport, 'localhost', :worker_3_port, 'block_writes') +FROM + pg_dist_shard NATURAL JOIN pg_dist_shard_placement +WHERE + logicalrelid = 'table1_colg3'::regclass AND nodeport <> :worker_3_port +ORDER BY + shardid; + +CALL citus_cleanup_orphaned_resources(); + +/* Activate and new nodes so that we can rebalance. */ +SELECT 1 FROM citus_activate_node('localhost', :worker_4_port); +SELECT citus_set_node_property('localhost', :worker_4_port, 'shouldhaveshards', true); + +SELECT 1 FROM citus_add_node('localhost', :worker_5_port); +SELECT 1 FROM citus_add_node('localhost', :worker_6_port); + +SELECT * FROM citus_rebalance_start(); + +SELECT citus_rebalance_wait(); + +SELECT S.shardid, P.colocationid +FROM pg_dist_shard S, pg_dist_partition P +WHERE S.logicalrelid = P.logicalrelid ORDER BY S.shardid ASC; + +SELECT D.task_id, + (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.task_id), + D.depends_on, + (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.depends_on) +FROM pg_dist_background_task_depend D WHERE job_id = 17778 ORDER BY D.task_id, D.depends_on ASC; + +DROP SCHEMA background_rebalance_parallel CASCADE; +TRUNCATE pg_dist_background_job CASCADE; +SELECT public.wait_for_resource_cleanup(); +select citus_remove_node('localhost', :worker_3_port); +select citus_remove_node('localhost', :worker_4_port); +select citus_remove_node('localhost', :worker_5_port); +select citus_remove_node('localhost', :worker_6_port); +-- keep the rest of the tests inact that depends node/group ids +ALTER SEQUENCE pg_catalog.pg_dist_groupid_seq RESTART :last_group_id_cls; +ALTER SEQUENCE pg_catalog.pg_dist_node_nodeid_seq RESTART :last_node_id_cls; + diff --git a/src/test/regress/sql/citus_non_blocking_split_shards.sql b/src/test/regress/sql/citus_non_blocking_split_shards.sql index 11275a342..909beac02 100644 --- a/src/test/regress/sql/citus_non_blocking_split_shards.sql +++ b/src/test/regress/sql/citus_non_blocking_split_shards.sql @@ -53,7 +53,7 @@ SELECT create_distributed_table('sensors', 'measureid', colocate_with:='none'); CREATE TABLE reference_table (measureid integer PRIMARY KEY); SELECT create_reference_table('reference_table'); -CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY); +CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY, genid integer GENERATED ALWAYS AS ( measureid + 3 ) stored, value varchar(44), col_todrop integer); CLUSTER colocated_dist_table USING colocated_dist_table_pkey; SELECT create_distributed_table('colocated_dist_table', 'measureid', colocate_with:='sensors'); @@ -70,9 +70,11 @@ ALTER TABLE sensors ADD CONSTRAINT fkey_table_to_dist FOREIGN KEY (measureid) RE -- BEGIN : Load data into tables. INSERT INTO reference_table SELECT i FROM generate_series(0,1000)i; -INSERT INTO colocated_dist_table SELECT i FROM generate_series(0,1000)i; +INSERT INTO colocated_dist_table(measureid, value, col_todrop) SELECT i,'Value',i FROM generate_series(0,1000)i; INSERT INTO sensors SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' FROM generate_series(0,1000)i; +ALTER TABLE colocated_dist_table DROP COLUMN col_todrop; + SELECT COUNT(*) FROM sensors; SELECT COUNT(*) FROM reference_table; SELECT COUNT(*) FROM colocated_dist_table; diff --git a/src/test/regress/sql/citus_split_shard_by_split_points.sql b/src/test/regress/sql/citus_split_shard_by_split_points.sql index f5e7f005a..47b28b9d7 100644 --- a/src/test/regress/sql/citus_split_shard_by_split_points.sql +++ b/src/test/regress/sql/citus_split_shard_by_split_points.sql @@ -49,7 +49,7 @@ SELECT create_distributed_table('sensors', 'measureid', colocate_with:='none'); CREATE TABLE reference_table (measureid integer PRIMARY KEY); SELECT create_reference_table('reference_table'); -CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY); +CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY, genid integer GENERATED ALWAYS AS ( measureid + 3 ) stored, value varchar(44), col_todrop integer); CLUSTER colocated_dist_table USING colocated_dist_table_pkey; SELECT create_distributed_table('colocated_dist_table', 'measureid', colocate_with:='sensors'); @@ -66,9 +66,11 @@ ALTER TABLE sensors ADD CONSTRAINT fkey_table_to_dist FOREIGN KEY (measureid) RE -- BEGIN : Load data into tables. INSERT INTO reference_table SELECT i FROM generate_series(0,1000)i; -INSERT INTO colocated_dist_table SELECT i FROM generate_series(0,1000)i; +INSERT INTO colocated_dist_table(measureid, value, col_todrop) SELECT i,'Value',i FROM generate_series(0,1000)i; INSERT INTO sensors SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' FROM generate_series(0,1000)i; +ALTER TABLE colocated_dist_table DROP COLUMN col_todrop; + SELECT COUNT(*) FROM sensors; SELECT COUNT(*) FROM reference_table; SELECT COUNT(*) FROM colocated_dist_table; diff --git a/src/test/regress/sql/citus_split_shard_by_split_points_deferred_drop.sql b/src/test/regress/sql/citus_split_shard_by_split_points_deferred_drop.sql index c3ca23c88..c72ebfa71 100644 --- a/src/test/regress/sql/citus_split_shard_by_split_points_deferred_drop.sql +++ b/src/test/regress/sql/citus_split_shard_by_split_points_deferred_drop.sql @@ -24,7 +24,7 @@ SET citus.next_placement_id TO 8610000; SET citus.shard_count TO 2; SET citus.shard_replication_factor TO 1; SET citus.next_operation_id TO 777; -SET citus.next_cleanup_record_id TO 511; +ALTER SEQUENCE pg_catalog.pg_dist_cleanup_recordid_seq RESTART 511; SET ROLE test_split_role; SET search_path TO "citus_split_shard_by_split_points_deferred_schema"; @@ -51,7 +51,7 @@ SELECT pg_catalog.citus_split_shard_by_split_points( -- The original shard is marked for deferred drop with policy_type = 2. -- The previous shard should be dropped at the beginning of the second split call -SELECT * from pg_dist_cleanup; +SELECT * FROM pg_dist_cleanup WHERE policy_type = 2; -- One of the physical shards should not be deleted, the other one should. \c - - - :worker_1_port @@ -62,7 +62,7 @@ SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind -- Perform deferred drop cleanup. \c - postgres - :master_port -CALL citus_cleanup_orphaned_resources(); +SELECT public.wait_for_resource_cleanup(); -- Clenaup has been done. SELECT * from pg_dist_cleanup; diff --git a/src/test/regress/sql/failure_mx_metadata_sync_multi_trans.sql b/src/test/regress/sql/failure_mx_metadata_sync_multi_trans.sql new file mode 100644 index 000000000..efd4879bd --- /dev/null +++ b/src/test/regress/sql/failure_mx_metadata_sync_multi_trans.sql @@ -0,0 +1,282 @@ +-- +-- failure_mx_metadata_sync_multi_trans.sql +-- +CREATE SCHEMA IF NOT EXISTS mx_metadata_sync_multi_trans; +SET SEARCH_PATH = mx_metadata_sync_multi_trans; +SET citus.shard_count TO 2; +SET citus.next_shard_id TO 16000000; +SET citus.shard_replication_factor TO 1; +SET citus.metadata_sync_mode TO 'nontransactional'; + +SELECT pg_backend_pid() as pid \gset +SELECT citus.mitmproxy('conn.allow()'); + +\set VERBOSITY terse +SET client_min_messages TO ERROR; + +-- Create roles +CREATE ROLE foo1; +CREATE ROLE foo2; + +-- Create sequence +CREATE SEQUENCE seq; + +-- Create colocated distributed tables +CREATE TABLE dist1 (id int PRIMARY KEY default nextval('seq')); +SELECT create_distributed_table('dist1', 'id'); +INSERT INTO dist1 SELECT i FROM generate_series(1,100) i; + +CREATE TABLE dist2 (id int PRIMARY KEY default nextval('seq')); +SELECT create_distributed_table('dist2', 'id'); +INSERT INTO dist2 SELECT i FROM generate_series(1,100) i; + +-- Create a reference table +CREATE TABLE ref (id int UNIQUE); +SELECT create_reference_table('ref'); +INSERT INTO ref SELECT i FROM generate_series(1,100) i; + +-- Create local tables +CREATE TABLE loc1 (id int PRIMARY KEY); +INSERT INTO loc1 SELECT i FROM generate_series(1,100) i; + +CREATE TABLE loc2 (id int REFERENCES loc1(id)); +INSERT INTO loc2 SELECT i FROM generate_series(1,100) i; + +SELECT citus_set_coordinator_host('localhost', :master_port); +SELECT citus_add_local_table_to_metadata('loc1', cascade_via_foreign_keys => true); + +-- Create partitioned distributed table +CREATE TABLE orders ( + id bigint, + order_time timestamp without time zone NOT NULL, + region_id bigint NOT NULL +) +PARTITION BY RANGE (order_time); + +SELECT create_time_partitions( + table_name := 'orders', + partition_interval := '1 day', + start_from := '2020-01-01', + end_at := '2020-01-11' +); +SELECT create_distributed_table('orders', 'region_id'); + +-- Initially turn metadata sync to worker2 off because we'll ingest errors to start/stop metadata sync operations +SELECT stop_metadata_sync_to_node('localhost', :worker_2_proxy_port); +SELECT isactive, metadatasynced, hasmetadata FROM pg_dist_node WHERE nodeport=:worker_2_proxy_port; + +-- Failure to send local group id +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_local_group SET groupid").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_local_group SET groupid").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to drop node metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_node").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_node").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to send node metadata +SELECT citus.mitmproxy('conn.onQuery(query="INSERT INTO pg_dist_node").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="INSERT INTO pg_dist_node").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to drop sequence +SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_catalog.worker_drop_sequence_dependency").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_catalog.worker_drop_sequence_dependency").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to drop shell table +SELECT citus.mitmproxy('conn.onQuery(query="CALL pg_catalog.worker_drop_all_shell_tables").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="CALL pg_catalog.worker_drop_all_shell_tables").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to delete all pg_dist_partition metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_partition").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_partition").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to delete all pg_dist_shard metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_shard").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_shard").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to delete all pg_dist_placement metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_placement").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_placement").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to delete all pg_dist_object metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_catalog.pg_dist_object").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_catalog.pg_dist_object").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to delete all pg_dist_colocation metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_catalog.pg_dist_colocation").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_catalog.pg_dist_colocation").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to alter or create role +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_create_or_alter_role").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_create_or_alter_role").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to set database owner +SELECT citus.mitmproxy('conn.onQuery(query="ALTER DATABASE.*OWNER TO").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="ALTER DATABASE.*OWNER TO").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Filure to create schema +SELECT citus.mitmproxy('conn.onQuery(query="CREATE SCHEMA IF NOT EXISTS mx_metadata_sync_multi_trans AUTHORIZATION").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="CREATE SCHEMA IF NOT EXISTS mx_metadata_sync_multi_trans AUTHORIZATION").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to create sequence +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_apply_sequence_command").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_apply_sequence_command").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to create distributed table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.dist1").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.dist1").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to create reference table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.ref").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.ref").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to create local table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.loc1").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.loc1").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to create distributed partitioned table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.orders").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.orders").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to create distributed partition table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.orders_p2020_01_05").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.orders_p2020_01_05").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to attach partition +SELECT citus.mitmproxy('conn.onQuery(query="ALTER TABLE mx_metadata_sync_multi_trans.orders ATTACH PARTITION mx_metadata_sync_multi_trans.orders_p2020_01_05").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="ALTER TABLE mx_metadata_sync_multi_trans.orders ATTACH PARTITION mx_metadata_sync_multi_trans.orders_p2020_01_05").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to add partition metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_partition_metadata").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_partition_metadata").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to add shard metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_shard_metadata").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_shard_metadata").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to add placement metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_placement_metadata").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_placement_metadata").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to add colocation metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_catalog.citus_internal_add_colocation_metadata").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_catalog.citus_internal_add_colocation_metadata").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to add distributed object metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_object_metadata").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_object_metadata").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to set isactive to true +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET isactive = TRUE").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET isactive = TRUE").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to set metadatasynced to true +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET metadatasynced = TRUE").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET metadatasynced = TRUE").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to set hasmetadata to true +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET hasmetadata = TRUE").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET hasmetadata = TRUE").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Show node metadata info on coordinator after failures +SELECT * FROM pg_dist_node ORDER BY nodeport; + +-- Show that we can still query the node from coordinator +SELECT COUNT(*) FROM dist1; + +-- Verify that the value 103 belongs to a shard at the node to which we failed to sync metadata +SELECT 103 AS failed_node_val \gset +SELECT nodeid AS failed_nodeid FROM pg_dist_node WHERE metadatasynced = false \gset +SELECT get_shard_id_for_distribution_column('dist1', :failed_node_val) AS shardid \gset +SELECT groupid = :failed_nodeid FROM pg_dist_placement WHERE shardid = :shardid; + +-- Show that we can still insert into a shard at the node from coordinator +INSERT INTO dist1 VALUES (:failed_node_val); + +-- Show that we can still update a shard at the node from coordinator +UPDATE dist1 SET id = :failed_node_val WHERE id = :failed_node_val; + +-- Show that we can still delete from a shard at the node from coordinator +DELETE FROM dist1 WHERE id = :failed_node_val; + +-- Show that DDL would still propagate to the node +SET client_min_messages TO NOTICE; +SET citus.log_remote_commands TO 1; +CREATE SCHEMA dummy; +SET citus.log_remote_commands TO 0; +SET client_min_messages TO ERROR; + +-- Successfully activate the node after many failures +SELECT citus.mitmproxy('conn.allow()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +-- Activate the node once more to verify it works again with already synced metadata +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Show node metadata info on worker2 and coordinator after success +\c - - - :worker_2_port +SELECT * FROM pg_dist_node ORDER BY nodeport; +\c - - - :master_port +SELECT * FROM pg_dist_node ORDER BY nodeport; +SELECT citus.mitmproxy('conn.allow()'); + +RESET citus.metadata_sync_mode; +DROP SCHEMA dummy; +DROP SCHEMA mx_metadata_sync_multi_trans CASCADE; +DROP ROLE foo1; +DROP ROLE foo2; +SELECT citus_remove_node('localhost', :master_port); diff --git a/src/test/regress/sql/generated_identity.sql b/src/test/regress/sql/generated_identity.sql index 004f45b40..c2980d0bd 100644 --- a/src/test/regress/sql/generated_identity.sql +++ b/src/test/regress/sql/generated_identity.sql @@ -1,266 +1,235 @@ +-- This test file has an alternative output because of error messages vary for PG13 +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int <= 13 AS server_version_le_13; + CREATE SCHEMA generated_identities; SET search_path TO generated_identities; SET client_min_messages to ERROR; +SET citus.shard_replication_factor TO 1; SELECT 1 from citus_add_node('localhost', :master_port, groupId=>0); -DROP TABLE IF EXISTS generated_identities_test; - --- create a partitioned table for testing. -CREATE TABLE generated_identities_test ( - a int CONSTRAINT myconname GENERATED BY DEFAULT AS IDENTITY, - b bigint GENERATED ALWAYS AS IDENTITY (START WITH 10 INCREMENT BY 10), - c smallint GENERATED BY DEFAULT AS IDENTITY, - d serial, - e bigserial, - f smallserial, - g int -) -PARTITION BY RANGE (a); -CREATE TABLE generated_identities_test_1_5 PARTITION OF generated_identities_test FOR VALUES FROM (1) TO (5); -CREATE TABLE generated_identities_test_5_50 PARTITION OF generated_identities_test FOR VALUES FROM (5) TO (50); - --- local tables -SELECT citus_add_local_table_to_metadata('generated_identities_test'); - -\d generated_identities_test - -\c - - - :worker_1_port - -\d generated_identities.generated_identities_test - -\c - - - :master_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; - -SELECT undistribute_table('generated_identities_test'); - -SELECT citus_remove_node('localhost', :master_port); - -SELECT create_distributed_table('generated_identities_test', 'a'); - -\d generated_identities_test - -\c - - - :worker_1_port - -\d generated_identities.generated_identities_test - -\c - - - :master_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; - -insert into generated_identities_test (g) values (1); - -insert into generated_identities_test (g) SELECT 2; - -INSERT INTO generated_identities_test (g) -SELECT s FROM generate_series(3,7) s; - -SELECT * FROM generated_identities_test ORDER BY 1; - -SELECT undistribute_table('generated_identities_test'); - -SELECT * FROM generated_identities_test ORDER BY 1; - -\d generated_identities_test - -\c - - - :worker_1_port - -\d generated_identities.generated_identities_test - -\c - - - :master_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; - -INSERT INTO generated_identities_test (g) -SELECT s FROM generate_series(8,10) s; - -SELECT * FROM generated_identities_test ORDER BY 1; - --- distributed table -SELECT create_distributed_table('generated_identities_test', 'a'); - --- alter table .. alter column .. add is unsupported -ALTER TABLE generated_identities_test ALTER COLUMN g ADD GENERATED ALWAYS AS IDENTITY; - --- alter table .. alter column is unsupported -ALTER TABLE generated_identities_test ALTER COLUMN b TYPE int; - -SELECT alter_distributed_table('generated_identities_test', 'g'); - -SELECT alter_distributed_table('generated_identities_test', 'b'); - -SELECT alter_distributed_table('generated_identities_test', 'c'); - -SELECT undistribute_table('generated_identities_test'); - -SELECT * FROM generated_identities_test ORDER BY g; - --- reference table - -DROP TABLE generated_identities_test; - -CREATE TABLE generated_identities_test ( - a int GENERATED BY DEFAULT AS IDENTITY, - b bigint GENERATED ALWAYS AS IDENTITY (START WITH 10 INCREMENT BY 10), - c smallint GENERATED BY DEFAULT AS IDENTITY, - d serial, - e bigserial, - f smallserial, - g int +-- smallint identity column can not be distributed +CREATE TABLE smallint_identity_column ( + a smallint GENERATED BY DEFAULT AS IDENTITY ); +SELECT create_distributed_table('smallint_identity_column', 'a'); +SELECT create_distributed_table_concurrently('smallint_identity_column', 'a'); +SELECT create_reference_table('smallint_identity_column'); +SELECT citus_add_local_table_to_metadata('smallint_identity_column'); -SELECT create_reference_table('generated_identities_test'); +DROP TABLE smallint_identity_column; -\d generated_identities_test +-- int identity column can not be distributed +CREATE TABLE int_identity_column ( + a int GENERATED BY DEFAULT AS IDENTITY +); +SELECT create_distributed_table('int_identity_column', 'a'); +SELECT create_distributed_table_concurrently('int_identity_column', 'a'); +SELECT create_reference_table('int_identity_column'); +SELECT citus_add_local_table_to_metadata('int_identity_column'); +DROP TABLE int_identity_column; +RESET citus.shard_replication_factor; + + +CREATE TABLE bigint_identity_column ( + a bigint GENERATED BY DEFAULT AS IDENTITY, + b int +); +SELECT citus_add_local_table_to_metadata('bigint_identity_column'); +DROP TABLE bigint_identity_column; + +CREATE TABLE bigint_identity_column ( + a bigint GENERATED BY DEFAULT AS IDENTITY, + b int +); +SELECT create_distributed_table('bigint_identity_column', 'a'); + +\d bigint_identity_column \c - - - :worker_1_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; -\d generated_identities.generated_identities_test +INSERT INTO bigint_identity_column (b) +SELECT s FROM generate_series(1,10) s; + +\d generated_identities.bigint_identity_column \c - - - :master_port SET search_path TO generated_identities; SET client_min_messages to ERROR; -INSERT INTO generated_identities_test (g) +INSERT INTO bigint_identity_column (b) SELECT s FROM generate_series(11,20) s; -SELECT * FROM generated_identities_test ORDER BY g; +SELECT * FROM bigint_identity_column ORDER BY B ASC; -SELECT undistribute_table('generated_identities_test'); +-- table with identity column cannot be altered. +SELECT alter_distributed_table('bigint_identity_column', 'b'); -\d generated_identities_test +-- table with identity column cannot be undistributed. +SELECT undistribute_table('bigint_identity_column'); + +DROP TABLE bigint_identity_column; + +-- create a partitioned table for testing. +CREATE TABLE partitioned_table ( + a bigint CONSTRAINT myconname GENERATED BY DEFAULT AS IDENTITY (START WITH 10 INCREMENT BY 10), + b bigint GENERATED ALWAYS AS IDENTITY (START WITH 10 INCREMENT BY 10), + c int +) +PARTITION BY RANGE (c); +CREATE TABLE partitioned_table_1_50 PARTITION OF partitioned_table FOR VALUES FROM (1) TO (50); +CREATE TABLE partitioned_table_50_500 PARTITION OF partitioned_table FOR VALUES FROM (50) TO (1000); + +SELECT create_distributed_table('partitioned_table', 'a'); + +\d partitioned_table \c - - - :worker_1_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; -\d generated_identities.generated_identities_test +\d generated_identities.partitioned_table + +insert into partitioned_table (c) values (1); + +insert into partitioned_table (c) SELECT 2; + +INSERT INTO partitioned_table (c) +SELECT s FROM generate_series(3,7) s; \c - - - :master_port SET search_path TO generated_identities; SET client_min_messages to ERROR; +INSERT INTO partitioned_table (c) +SELECT s FROM generate_series(10,20) s; + +INSERT INTO partitioned_table (a,c) VALUES (998,998); + +INSERT INTO partitioned_table (a,b,c) OVERRIDING SYSTEM VALUE VALUES (999,999,999); + +SELECT * FROM partitioned_table ORDER BY c ASC; + +-- alter table .. alter column .. add is unsupported +ALTER TABLE partitioned_table ALTER COLUMN g ADD GENERATED ALWAYS AS IDENTITY; + +-- alter table .. alter column is unsupported +ALTER TABLE partitioned_table ALTER COLUMN b TYPE int; + +DROP TABLE partitioned_table; + +-- create a table for reference table testing. +CREATE TABLE reference_table ( + a bigint CONSTRAINT myconname GENERATED BY DEFAULT AS IDENTITY (START WITH 10 INCREMENT BY 10), + b bigint GENERATED ALWAYS AS IDENTITY (START WITH 10 INCREMENT BY 10) UNIQUE, + c int +); + +SELECT create_reference_table('reference_table'); + +\d reference_table + +\c - - - :worker_1_port +SET search_path TO generated_identities; + +\d generated_identities.reference_table + +INSERT INTO reference_table (c) +SELECT s FROM generate_series(1,10) s; + +--on master +select * from reference_table; + +\c - - - :master_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; + +INSERT INTO reference_table (c) +SELECT s FROM generate_series(11,20) s; + +SELECT * FROM reference_table ORDER BY c ASC; + +DROP TABLE reference_table; + +CREATE TABLE color ( + color_id BIGINT GENERATED ALWAYS AS IDENTITY UNIQUE, + color_name VARCHAR NOT NULL +); + +-- https://github.com/citusdata/citus/issues/6694 +CREATE USER identity_test_user; +GRANT INSERT ON color TO identity_test_user; +GRANT USAGE ON SCHEMA generated_identities TO identity_test_user; + +SET ROLE identity_test_user; +SELECT create_distributed_table('color', 'color_id'); + +SET ROLE postgres; +SET citus.shard_replication_factor TO 1; +SELECT create_distributed_table_concurrently('color', 'color_id'); +RESET citus.shard_replication_factor; + +\c - identity_test_user - :worker_1_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; + +INSERT INTO color(color_name) VALUES ('Blue'); + +\c - postgres - :master_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; +SET citus.next_shard_id TO 12400000; + +DROP TABLE Color; +CREATE TABLE color ( + color_id BIGINT GENERATED ALWAYS AS IDENTITY UNIQUE, + color_name VARCHAR NOT NULL +) USING columnar; +SELECT create_distributed_table('color', 'color_id'); +INSERT INTO color(color_name) VALUES ('Blue'); +\d+ color + +\c - - - :worker_1_port +SET search_path TO generated_identities; +\d+ color +INSERT INTO color(color_name) VALUES ('Red'); +-- alter sequence .. restart +ALTER SEQUENCE color_color_id_seq RESTART WITH 1000; +-- override system value +INSERT INTO color(color_id, color_name) VALUES (1, 'Red'); +INSERT INTO color(color_id, color_name) VALUES (NULL, 'Red'); +INSERT INTO color(color_id, color_name) OVERRIDING SYSTEM VALUE VALUES (1, 'Red'); +-- update null or custom value +UPDATE color SET color_id = NULL; +UPDATE color SET color_id = 1; + +\c - postgres - :master_port +SET search_path TO generated_identities; +SET client_min_messages to ERROR; + + -- alter table .. add column .. GENERATED .. AS IDENTITY -DROP TABLE IF EXISTS color; -CREATE TABLE color ( - color_name VARCHAR NOT NULL -); -SELECT create_distributed_table('color', 'color_name'); ALTER TABLE color ADD COLUMN color_id BIGINT GENERATED ALWAYS AS IDENTITY; -INSERT INTO color(color_name) VALUES ('Red'); -ALTER TABLE color ADD COLUMN color_id_1 BIGINT GENERATED ALWAYS AS IDENTITY; -DROP TABLE color; --- insert data from workers -CREATE TABLE color ( - color_id BIGINT GENERATED ALWAYS AS IDENTITY UNIQUE, - color_name VARCHAR NOT NULL -); -SELECT create_distributed_table('color', 'color_id'); +-- alter sequence .. restart +ALTER SEQUENCE color_color_id_seq RESTART WITH 1000; +-- override system value +INSERT INTO color(color_id, color_name) VALUES (1, 'Red'); +INSERT INTO color(color_id, color_name) VALUES (NULL, 'Red'); +INSERT INTO color(color_id, color_name) OVERRIDING SYSTEM VALUE VALUES (1, 'Red'); +-- update null or custom value +UPDATE color SET color_id = NULL; +UPDATE color SET color_id = 1; -\c - - - :worker_1_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; - -INSERT INTO color(color_name) VALUES ('Red'); - -\c - - - :master_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; - -SELECT undistribute_table('color'); -SELECT create_distributed_table('color', 'color_id'); - -\c - - - :worker_1_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; - -INSERT INTO color(color_name) VALUES ('Red'); - -\c - - - :master_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; - -INSERT INTO color(color_name) VALUES ('Red'); - -SELECT count(*) from color; - --- modify sequence & alter table -DROP TABLE color; - -CREATE TABLE color ( - color_id BIGINT GENERATED ALWAYS AS IDENTITY UNIQUE, - color_name VARCHAR NOT NULL -); -SELECT create_distributed_table('color', 'color_id'); - -\c - - - :worker_1_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; - -INSERT INTO color(color_name) VALUES ('Red'); - -\c - - - :master_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; - -SELECT undistribute_table('color'); - -ALTER SEQUENCE color_color_id_seq RENAME TO myseq; - -SELECT create_distributed_table('color', 'color_id'); -\ds+ myseq -\ds+ color_color_id_seq -\d color - -\c - - - :worker_1_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; - -\ds+ myseq -\ds+ color_color_id_seq -\d color - -INSERT INTO color(color_name) VALUES ('Red'); - -\c - - - :master_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; - -ALTER SEQUENCE myseq RENAME TO color_color_id_seq; - -\ds+ myseq -\ds+ color_color_id_seq - -INSERT INTO color(color_name) VALUES ('Red'); - -\c - - - :worker_1_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; - -\ds+ myseq -\ds+ color_color_id_seq -\d color - -INSERT INTO color(color_name) VALUES ('Red'); - -\c - - - :master_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; - -SELECT alter_distributed_table('co23423lor', shard_count := 6); - -INSERT INTO color(color_name) VALUES ('Red'); - -\c - - - :worker_1_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; - -\ds+ color_color_id_seq - -INSERT INTO color(color_name) VALUES ('Red'); - -\c - - - :master_port -SET search_path TO generated_identities; -SET client_min_messages to ERROR; +DROP TABLE IF EXISTS test; +CREATE TABLE test (x int, y int, z bigint generated by default as identity); +SELECT create_distributed_table('test', 'x', colocate_with := 'none'); +INSERT INTO test VALUES (1,2); +INSERT INTO test SELECT x, y FROM test WHERE x = 1; +SELECT * FROM test; DROP SCHEMA generated_identities CASCADE; +DROP USER identity_test_user; diff --git a/src/test/regress/sql/local_shard_execution.sql b/src/test/regress/sql/local_shard_execution.sql index 5e678e02a..4bf5aeec4 100644 --- a/src/test/regress/sql/local_shard_execution.sql +++ b/src/test/regress/sql/local_shard_execution.sql @@ -310,7 +310,7 @@ ROLLBACK; -- make sure that everything is rollbacked SELECT * FROM distributed_table WHERE key = 1 ORDER BY 1,2,3; SELECT count(*) FROM second_distributed_table; -SELECT * FROM second_distributed_table; +SELECT * FROM second_distributed_table ORDER BY 1; -- very simple examples, an SELECTs should see the modifications -- that has done before @@ -931,6 +931,7 @@ RESET client_min_messages; RESET citus.log_local_commands; \c - - - :master_port +SET search_path TO local_shard_execution; SET citus.next_shard_id TO 1480000; -- test both local and remote execution with custom type SET citus.shard_replication_factor TO 1; @@ -1063,7 +1064,9 @@ EXECUTE router_select_with_no_dist_key_filter('yes'); TRUNCATE event_responses; CREATE OR REPLACE PROCEDURE register_for_event(p_event_id int, p_user_id int, p_choice invite_resp) -LANGUAGE plpgsql AS $fn$ +LANGUAGE plpgsql +SET search_path TO local_shard_execution +AS $fn$ BEGIN INSERT INTO event_responses VALUES (p_event_id, p_user_id, p_choice) ON CONFLICT (event_id, user_id) @@ -1094,6 +1097,7 @@ CALL register_for_event(16, 1, 'yes'); CALL register_for_event(16, 1, 'yes'); \c - - - :worker_2_port +SET search_path TO local_shard_execution; CALL register_for_event(16, 1, 'yes'); CALL register_for_event(16, 1, 'yes'); CALL register_for_event(16, 1, 'yes'); @@ -1426,6 +1430,7 @@ SELECT count(*) FROM pg_dist_transaction; SELECT recover_prepared_transactions(); \c - - - :master_port +SET search_path TO local_shard_execution; -- verify the local_hostname guc is used for local executions that should connect to the -- local host diff --git a/src/test/regress/sql/local_shard_execution_replicated.sql b/src/test/regress/sql/local_shard_execution_replicated.sql index 3a15e52de..89a4c61f5 100644 --- a/src/test/regress/sql/local_shard_execution_replicated.sql +++ b/src/test/regress/sql/local_shard_execution_replicated.sql @@ -273,7 +273,7 @@ ROLLBACK; -- make sure that everything is rollbacked SELECT * FROM distributed_table WHERE key = 1 ORDER BY 1,2,3; SELECT count(*) FROM second_distributed_table; -SELECT * FROM second_distributed_table; +SELECT * FROM second_distributed_table ORDER BY 1; -- very simple examples, an SELECTs should see the modifications -- that has done before diff --git a/src/test/regress/sql/logical_replication.sql b/src/test/regress/sql/logical_replication.sql index f155aaa49..e78b0a393 100644 --- a/src/test/regress/sql/logical_replication.sql +++ b/src/test/regress/sql/logical_replication.sql @@ -21,7 +21,9 @@ SELECT 1 from citus_add_node('localhost', :master_port, groupId := 0); -- This allows us to test the cleanup logic at the start of the shard move. \c - - - :worker_1_port SET search_path TO logical_replication; +SET citus.enable_ddl_propagation TO off; CREATE PUBLICATION citus_shard_move_publication_:postgres_oid FOR TABLE dist_6830000; +RESET citus.enable_ddl_propagation; \c - - - :master_port SET search_path TO logical_replication; @@ -72,6 +74,9 @@ SELECT count(*) from pg_publication; SELECT count(*) from pg_replication_slots; SELECT count(*) from dist; +DROP PUBLICATION citus_shard_move_publication_:postgres_oid; +SELECT pg_drop_replication_slot('citus_shard_move_slot_' || :postgres_oid); + \c - - - :worker_2_port SET search_path TO logical_replication; @@ -88,3 +93,4 @@ ALTER SUBSCRIPTION citus_shard_move_subscription_:postgres_oid DISABLE; ALTER SUBSCRIPTION citus_shard_move_subscription_:postgres_oid SET (slot_name = NONE); DROP SUBSCRIPTION citus_shard_move_subscription_:postgres_oid; DROP SCHEMA logical_replication CASCADE; +SELECT public.wait_for_resource_cleanup(); diff --git a/src/test/regress/sql/merge.sql b/src/test/regress/sql/merge.sql index c266b5333..d663491ae 100644 --- a/src/test/regress/sql/merge.sql +++ b/src/test/regress/sql/merge.sql @@ -18,7 +18,9 @@ CREATE SCHEMA merge_schema; SET search_path TO merge_schema; SET citus.shard_count TO 4; SET citus.next_shard_id TO 4000000; -SET citus.explain_all_tasks to true; +SET citus.explain_all_tasks TO true; +SET citus.shard_replication_factor TO 1; +SET citus.max_adaptive_executor_pool_size TO 1; SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0); CREATE TABLE source @@ -143,9 +145,33 @@ SELECT undistribute_table('source'); SELECT create_distributed_table('target', 'customer_id'); SELECT create_distributed_table('source', 'customer_id'); +-- Updates one of the row with customer_id = 30002 +SELECT * from target t WHERE t.customer_id = 30002; +-- Turn on notice to print tasks sent to nodes +SET citus.log_remote_commands to true; MERGE INTO target t USING source s - ON (t.customer_id = s.customer_id) + ON (t.customer_id = s.customer_id) AND t.customer_id = 30002 + + WHEN MATCHED AND t.order_center = 'XX' THEN + DELETE + + WHEN MATCHED THEN + UPDATE SET -- Existing customer, update the order count and last_order_id + order_count = t.order_count + 1, + last_order_id = s.order_id + + WHEN NOT MATCHED THEN + DO NOTHING; + +SET citus.log_remote_commands to false; +SELECT * from target t WHERE t.customer_id = 30002; + +-- Deletes one of the row with customer_id = 30004 +SELECT * from target t WHERE t.customer_id = 30004; +MERGE INTO target t + USING source s + ON (t.customer_id = s.customer_id) AND t.customer_id = 30004 WHEN MATCHED AND t.order_center = 'XX' THEN DELETE @@ -158,6 +184,22 @@ MERGE INTO target t WHEN NOT MATCHED THEN -- New entry, record it. INSERT (customer_id, last_order_id, order_center, order_count, last_order) VALUES (customer_id, s.order_id, s.order_center, 123, s.order_time); +SELECT * from target t WHERE t.customer_id = 30004; + +-- Updating distribution column is allowed if the operation is a no-op +SELECT * from target t WHERE t.customer_id = 30000; +MERGE INTO target t +USING SOURCE s +ON (t.customer_id = s.customer_id AND t.customer_id = 30000) +WHEN MATCHED THEN + UPDATE SET customer_id = 30000; + +MERGE INTO target t +USING SOURCE s +ON (t.customer_id = s.customer_id AND t.customer_id = 30000) +WHEN MATCHED THEN + UPDATE SET customer_id = t.customer_id; +SELECT * from target t WHERE t.customer_id = 30000; -- -- Test MERGE with CTE as source @@ -197,7 +239,6 @@ MERGE INTO t1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (pg_res.id, pg_res.val); --- Two rows with id 2 and val incremented, id 3, and id 1 is deleted SELECT * FROM t1 order by id; SELECT * INTO merge_result FROM t1 order by id; @@ -243,11 +284,13 @@ SELECT create_distributed_table('t1', 'id'); SELECT create_distributed_table('s1', 'id'); +SELECT * FROM t1 order by id; +SET citus.log_remote_commands to true; WITH s1_res AS ( SELECT * FROM s1 ) MERGE INTO t1 - USING s1_res ON (s1_res.id = t1.id) + USING s1_res ON (s1_res.id = t1.id) AND t1.id = 6 WHEN MATCHED AND s1_res.val = 0 THEN DELETE @@ -255,6 +298,9 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); +SET citus.log_remote_commands to false; +-- Other than id 6 everything else is a NO match, and should appear in target +SELECT * FROM t1 order by 1, 2; -- -- Test with multiple join conditions @@ -325,15 +371,21 @@ SELECT undistribute_table('s2'); SELECT create_distributed_table('t2', 'id'); SELECT create_distributed_table('s2', 'id'); +SELECT * FROM t2 ORDER BY 1; +SET citus.log_remote_commands to true; MERGE INTO t2 USING s2 -ON t2.id = s2.id AND t2.src = s2.src +ON t2.id = s2.id AND t2.src = s2.src AND t2.id = 4 WHEN MATCHED AND t2.val = 1 THEN UPDATE SET val = s2.val + 10 WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN - INSERT (id, val, src) VALUES (s2.id, s2.val, s2.src); + DO NOTHING; +SET citus.log_remote_commands to false; +-- Row with id = 4 is a match for delete clause, row should be deleted +-- Row with id = 3 is a NO match, row from source will be inserted +SELECT * FROM t2 ORDER BY 1; -- -- With sub-query as the MERGE source @@ -740,7 +792,8 @@ $$ language plpgsql volatile; CREATE TABLE fn_target(id int, data varchar); MERGE INTO fn_target -USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +USING (SELECT id, source FROM dist_table) as fn_source ON fn_source.id = fn_target.id WHEN MATCHED THEN DO NOTHING @@ -753,11 +806,12 @@ SELECT * INTO fn_result FROM fn_target ORDER BY 1 ; -- Clean the slate TRUNCATE TABLE fn_target; SELECT citus_add_local_table_to_metadata('fn_target'); -SELECT create_distributed_table('dist_table', 'id'); +SELECT citus_add_local_table_to_metadata('dist_table'); SET client_min_messages TO DEBUG1; MERGE INTO fn_target -USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source +USING (SELECT id, source FROM dist_table) as fn_source ON fn_source.id = fn_target.id WHEN MATCHED THEN DO NOTHING @@ -824,10 +878,761 @@ RESET client_min_messages; SELECT * FROM ft_target; +-- +-- complex joins on the source side +-- + +-- source(join of two relations) relation is an unaliased join + +CREATE TABLE target_cj(tid int, src text, val int); +CREATE TABLE source_cj1(sid1 int, src1 text, val1 int); +CREATE TABLE source_cj2(sid2 int, src2 text, val2 int); + +INSERT INTO target_cj VALUES (1, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (3, 'target', 0); + +INSERT INTO source_cj1 VALUES (2, 'source-1', 10); +INSERT INTO source_cj2 VALUES (2, 'source-2', 20); + +BEGIN; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +-- Gold result to compare against +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET tid = sid2, src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +-- Gold result to compare against +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +-- Test the same scenarios with distributed tables + +SELECT create_distributed_table('target_cj', 'tid'); +SELECT create_distributed_table('source_cj1', 'sid1'); +SELECT create_distributed_table('source_cj2', 'sid2'); + +BEGIN; +SET citus.log_remote_commands to true; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +SET citus.log_remote_commands to false; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +-- sub-query as a source +BEGIN; +MERGE INTO target_cj t +USING (SELECT * FROM source_cj1 WHERE sid1 = 2) sub +ON t.tid = sub.sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = sub.src1, val = val1 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +-- Test self-join +BEGIN; +SELECT * FROM target_cj ORDER BY 1; +set citus.log_remote_commands to true; +MERGE INTO target_cj t1 +USING (SELECT * FROM target_cj) sub +ON t1.tid = sub.tid AND t1.tid = 3 +WHEN MATCHED THEN + UPDATE SET src = sub.src, val = sub.val + 100 +WHEN NOT MATCHED THEN + DO NOTHING; +set citus.log_remote_commands to false; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + + +-- Test PREPARE +PREPARE foo(int) AS +MERGE INTO target_cj target +USING (SELECT * FROM source_cj1) sub +ON target.tid = sub.sid1 AND target.tid = $1 +WHEN MATCHED THEN + UPDATE SET val = sub.val1 +WHEN NOT MATCHED THEN + DO NOTHING; + +SELECT * FROM target_cj ORDER BY 1; + +BEGIN; +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +EXECUTE foo(2); +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +BEGIN; + +SET citus.log_remote_commands to true; +SET client_min_messages TO DEBUG1; +EXECUTE foo(2); +RESET client_min_messages; + +EXECUTE foo(2); +SET citus.log_remote_commands to false; + +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +-- Test distributed tables, must be co-located and joined on distribution column. + +-- +-- We create two sets of source and target tables, one set is Postgres and the other +-- is Citus distributed. Run the _exact_ MERGE SQL on both the sets and compare the +-- final results of target tables of Postgres and Citus, the result should match. +-- This is repeated for various MERGE SQL combinations +-- +CREATE TABLE pg_target(id int, val varchar); +CREATE TABLE pg_source(id int, val varchar); +CREATE TABLE citus_target(id int, val varchar); +CREATE TABLE citus_source(id int, val varchar); + +-- Half of the source rows do not match +INSERT INTO pg_target SELECT i, 'target' FROM generate_series(250, 500) i; +INSERT INTO pg_source SELECT i, 'source' FROM generate_series(1, 500) i; + +INSERT INTO citus_target SELECT i, 'target' FROM generate_series(250, 500) i; +INSERT INTO citus_source SELECT i, 'source' FROM generate_series(1, 500) i; + +SELECT create_distributed_table('citus_target', 'id'); +SELECT create_distributed_table('citus_source', 'id'); + +-- +-- This routine compares the target tables of Postgres and Citus and +-- returns true if they match, false if the results do not match. +-- +CREATE OR REPLACE FUNCTION compare_tables() RETURNS BOOLEAN AS $$ +DECLARE ret BOOL; +BEGIN +SELECT count(1) = 0 INTO ret + FROM pg_target + FULL OUTER JOIN citus_target + USING (id, val) + WHERE pg_target.id IS NULL + OR citus_target.id IS NULL; +RETURN ret; +END +$$ LANGUAGE PLPGSQL; + +-- Make sure we start with exact data in Postgres and Citus +SELECT compare_tables(); + +-- Run the MERGE on both Postgres and Citus, and compare the final target tables + +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- +-- ON clause filter on source +-- +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- +-- ON clause filter on target +-- +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND t.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND t.id < 100 +WHEN MATCHED AND t.id > 400 THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- +-- NOT MATCHED clause filter on source +-- +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id +WHEN MATCHED THEN + DO NOTHING +WHEN NOT MATCHED AND s.id < 100 THEN + INSERT VALUES(s.id, s.val); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN MATCHED THEN + DO NOTHING +WHEN NOT MATCHED AND s.id < 100 THEN + INSERT VALUES(s.id, s.val); + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- +-- Test constant filter in ON clause to check if shards are pruned +-- with restriction information +-- + +-- +-- Though constant filter is present, this won't prune shards as +-- NOT MATCHED clause is present +-- +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + INSERT VALUES(s.id, s.val); + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- This will prune shards with restriction information as NOT MATCHED is void +BEGIN; +SET citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING pg_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + DO NOTHING; + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id AND s.id = 250 +WHEN MATCHED THEN + UPDATE SET val = t.val || 'Updated by Merge' +WHEN NOT MATCHED THEN + DO NOTHING; + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- Test CTE with distributed tables +CREATE VIEW pg_source_view AS SELECT * FROM pg_source WHERE id < 400; +CREATE VIEW citus_source_view AS SELECT * FROM citus_source WHERE id < 400; + +BEGIN; +SEt citus.log_remote_commands to true; + +WITH cte AS ( + SELECT * FROM pg_source_view +) +MERGE INTO pg_target t +USING cte +ON cte.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by CTE' +WHEN NOT MATCHED THEN + INSERT VALUES (cte.id, cte.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +WITH cte AS ( + SELECT * FROM citus_source_view +) +MERGE INTO citus_target t +USING cte +ON cte.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by CTE' +WHEN NOT MATCHED THEN + INSERT VALUES (cte.id, cte.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + + +-- Test sub-query with distributed tables +BEGIN; +SEt citus.log_remote_commands to true; + +MERGE INTO pg_target t +USING (SELECT * FROM pg_source) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by subquery' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, subq.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +MERGE INTO citus_target t +USING (SELECT * FROM citus_source) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated by subquery' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, subq.val) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +SET citus.log_remote_commands to false; +SELECT compare_tables(); +ROLLBACK; + +-- Test PREPARE +PREPARE pg_prep(int) AS +MERGE INTO pg_target +USING (SELECT * FROM pg_source) sub +ON pg_target.id = sub.id AND pg_target.id = $1 +WHEN MATCHED THEN + UPDATE SET val = 'Updated by prepare using ' || sub.val +WHEN NOT MATCHED THEN + INSERT VALUES (sub.id, sub.val); + +PREPARE citus_prep(int) AS +MERGE INTO citus_target +USING (SELECT * FROM citus_source) sub +ON citus_target.id = sub.id AND citus_target.id = $1 +WHEN MATCHED THEN + UPDATE SET val = 'Updated by prepare using ' || sub.val +WHEN NOT MATCHED THEN + INSERT VALUES (sub.id, sub.val); + +BEGIN; + +SELECT * FROM pg_target WHERE id = 500; -- before merge +SELECT count(*) FROM pg_target; -- before merge +EXECUTE pg_prep(500); +SELECT * FROM pg_target WHERE id = 500; -- non-cached +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +EXECUTE pg_prep(500); +SELECT * FROM pg_target WHERE id = 500; -- cached +SELECT count(*) FROM pg_target; -- cached + +SELECT * FROM citus_target WHERE id = 500; -- before merge +SELECT count(*) FROM citus_target; -- before merge +SET citus.log_remote_commands to true; +EXECUTE citus_prep(500); +SELECT * FROM citus_target WHERE id = 500; -- non-cached +EXECUTE citus_prep(500); +EXECUTE citus_prep(500); +EXECUTE citus_prep(500); +EXECUTE citus_prep(500); +EXECUTE citus_prep(500); +SET citus.log_remote_commands to false; +SELECT * FROM citus_target WHERE id = 500; -- cached +SELECT count(*) FROM citus_target; -- cached + +SELECT compare_tables(); +ROLLBACK; + +-- Test partitions + distributed tables + +CREATE TABLE pg_pa_target (tid integer, balance float, val text) + PARTITION BY LIST (tid); +CREATE TABLE citus_pa_target (tid integer, balance float, val text) + PARTITION BY LIST (tid); + +CREATE TABLE part1 PARTITION OF pg_pa_target FOR VALUES IN (1,4) + WITH (autovacuum_enabled=off); +CREATE TABLE part2 PARTITION OF pg_pa_target FOR VALUES IN (2,5,6) + WITH (autovacuum_enabled=off); +CREATE TABLE part3 PARTITION OF pg_pa_target FOR VALUES IN (3,8,9) + WITH (autovacuum_enabled=off); +CREATE TABLE part4 PARTITION OF pg_pa_target DEFAULT + WITH (autovacuum_enabled=off); +CREATE TABLE part5 PARTITION OF citus_pa_target FOR VALUES IN (1,4) + WITH (autovacuum_enabled=off); +CREATE TABLE part6 PARTITION OF citus_pa_target FOR VALUES IN (2,5,6) + WITH (autovacuum_enabled=off); +CREATE TABLE part7 PARTITION OF citus_pa_target FOR VALUES IN (3,8,9) + WITH (autovacuum_enabled=off); +CREATE TABLE part8 PARTITION OF citus_pa_target DEFAULT + WITH (autovacuum_enabled=off); + +CREATE TABLE pg_pa_source (sid integer, delta float); +CREATE TABLE citus_pa_source (sid integer, delta float); + +-- insert many rows to the source table +INSERT INTO pg_pa_source SELECT id, id * 10 FROM generate_series(1,14) AS id; +INSERT INTO citus_pa_source SELECT id, id * 10 FROM generate_series(1,14) AS id; +-- insert a few rows in the target table (odd numbered tid) +INSERT INTO pg_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id; +INSERT INTO citus_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id; + +SELECT create_distributed_table('citus_pa_target', 'tid'); +SELECT create_distributed_table('citus_pa_source', 'sid'); + +CREATE OR REPLACE FUNCTION pa_compare_tables() RETURNS BOOLEAN AS $$ +DECLARE ret BOOL; +BEGIN +SELECT count(1) = 0 INTO ret + FROM pg_pa_target + FULL OUTER JOIN citus_pa_target + USING (tid, balance, val) + WHERE pg_pa_target.tid IS NULL + OR citus_pa_target.tid IS NULL; +RETURN ret; +END +$$ LANGUAGE PLPGSQL; + +-- try simple MERGE +BEGIN; +MERGE INTO pg_pa_target t + USING pg_pa_source s + ON t.tid = s.sid + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); + +MERGE INTO citus_pa_target t + USING citus_pa_source s + ON t.tid = s.sid + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); + +SELECT pa_compare_tables(); +ROLLBACK; + +-- same with a constant qual +BEGIN; +MERGE INTO pg_pa_target t + USING pg_pa_source s + ON t.tid = s.sid AND tid = 1 + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); + +MERGE INTO citus_pa_target t + USING citus_pa_source s + ON t.tid = s.sid AND tid = 1 + WHEN MATCHED THEN + UPDATE SET balance = balance + delta, val = val || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid, delta, 'inserted by merge'); + +SELECT pa_compare_tables(); +ROLLBACK; + +CREATE TABLE source_json( id integer, z int, d jsonb); +CREATE TABLE target_json( id integer, z int, d jsonb); + +INSERT INTO source_json SELECT i,i FROM generate_series(0,5)i; + +SELECT create_distributed_table('target_json','id'), create_distributed_table('source_json', 'id'); + +-- single shard query given source_json is filtered and Postgres is smart to pushdown +-- filter to the target_json as well +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING (SELECT * FROM source_json WHERE id = 1) sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (sdn.id, 5); +$Q$); +SELECT * FROM target_json ORDER BY 1; + +-- zero shard query as filters do not match +--SELECT public.coordinator_plan($Q$ +--EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +--USING (SELECT * FROM source_json WHERE id = 1) sdn +--ON sda.id = sdn.id AND sda.id = 2 +--WHEN NOT matched THEN +-- INSERT (id, z) VALUES (sdn.id, 5); +--$Q$); +--SELECT * FROM target_json ORDER BY 1; + +-- join for source_json is happening at a different place +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING source_json s1 LEFT JOIN (SELECT * FROM source_json) s2 USING(z) +ON sda.id = s1.id AND s1.id = s2.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (s2.id, 5); +$Q$); +SELECT * FROM target_json ORDER BY 1; + +-- update JSON column +SELECT public.coordinator_plan($Q$ +EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN matched THEN + UPDATE SET d = '{"a" : 5}'; +$Q$); +SELECT * FROM target_json ORDER BY 1; + +CREATE FUNCTION immutable_hash(int) RETURNS int +AS 'SELECT hashtext( ($1 + $1)::text);' +LANGUAGE SQL +IMMUTABLE +RETURNS NULL ON NULL INPUT; + +MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN matched THEN + UPDATE SET z = immutable_hash(sdn.z); + +-- Test bigserial +CREATE TABLE source_serial (id integer, z int, d bigserial); +CREATE TABLE target_serial (id integer, z int, d bigserial); +INSERT INTO source_serial SELECT i,i FROM generate_series(0,100)i; +SELECT create_distributed_table('source_serial', 'id'), + create_distributed_table('target_serial', 'id'); + +MERGE INTO target_serial sda +USING source_serial sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (id, z); + +SELECT count(*) from source_serial; +SELECT count(*) from target_serial; + +SELECT count(distinct d) from source_serial; +SELECT count(distinct d) from target_serial; + +-- Test set operations +CREATE TABLE target_set(t1 int, t2 int); +CREATE TABLE source_set(s1 int, s2 int); + +SELECT create_distributed_table('target_set', 't1'), + create_distributed_table('source_set', 's1'); + +INSERT INTO target_set VALUES(1, 0); +INSERT INTO source_set VALUES(1, 1); +INSERT INTO source_set VALUES(2, 2); + +MERGE INTO target_set +USING (SELECT * FROM source_set UNION SELECT * FROM source_set) AS foo ON target_set.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 100 +WHEN NOT MATCHED THEN + INSERT VALUES(foo.s1); +SELECT * FROM target_set ORDER BY 1, 2; + -- -- Error and Unsupported scenarios -- +MERGE INTO target_set +USING source_set AS foo ON target_set.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET ctid = '(0,100)'; + +MERGE INTO target_set +USING (SELECT s1,s2 FROM source_set UNION SELECT s2,s1 FROM source_set) AS foo ON target_set.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; + +MERGE INTO target_set +USING (SELECT 2 as s3, source_set.* FROM (SELECT * FROM source_set LIMIT 1) as foo LEFT JOIN source_set USING( s1)) AS foo +ON target_set.t1 = foo.s1 +WHEN MATCHED THEN UPDATE SET t2 = t2 + 1 +WHEN NOT MATCHED THEN INSERT VALUES(s1, s3); + + +-- modifying CTE not supported +EXPLAIN +WITH cte_1 AS (DELETE FROM target_json) +MERGE INTO target_json sda +USING source_json sdn +ON sda.id = sdn.id +WHEN NOT matched THEN + INSERT (id, z) VALUES (sdn.id, 5); + +-- Grouping sets not supported +MERGE INTO citus_target t +USING (SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val)) subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, 99) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +WITH subq AS +( +SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val) +) +MERGE INTO citus_target t +USING subq +ON subq.id = t.id +WHEN MATCHED AND t.id > 350 THEN + UPDATE SET val = t.val || 'Updated' +WHEN NOT MATCHED THEN + INSERT VALUES (subq.id, 99) +WHEN MATCHED AND t.id < 350 THEN + DELETE; + +-- try inserting unmatched distribution column value +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT DEFAULT VALUES; + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT VALUES(10000); + +MERGE INTO citus_target t +USING citus_source s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (id) VALUES(1000); + +MERGE INTO t1 t +USING s1 s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (id) VALUES(s.val); + +MERGE INTO t1 t +USING s1 s +ON t.id = s.id +WHEN NOT MATCHED THEN + INSERT (val) VALUES(s.val); + +-- try updating the distribution key column +BEGIN; +MERGE INTO target_cj t + USING source_cj1 s + ON t.tid = s.sid1 AND t.tid = 2 + WHEN MATCHED THEN + UPDATE SET tid = tid + 9, src = src || ' updated by merge' + WHEN NOT MATCHED THEN + INSERT VALUES (sid1, 'inserted by merge', val1); +ROLLBACK; + -- Foreign table as target MERGE INTO foreign_table USING ft_target ON (foreign_table.id = ft_target.id) @@ -854,6 +1659,57 @@ MERGE INTO t1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1.id, s1.val); +-- Now both s1 and t1 are distributed tables +SELECT undistribute_table('t1'); +SELECT create_distributed_table('t1', 'id'); + +-- We have a potential pitfall where a function can be invoked in +-- the MERGE conditions which can insert/update to a random shard +CREATE OR REPLACE function merge_when_and_write() RETURNS BOOLEAN +LANGUAGE PLPGSQL AS +$$ +BEGIN + INSERT INTO t1 VALUES (100, 100); + RETURN TRUE; +END; +$$; + +-- Test functions executing in MERGE statement. This is to prevent the functions from +-- doing a random sql, which may be executed in a remote node or modifying the target +-- relation which will have unexpected/suprising results. +MERGE INTO t1 USING (SELECT * FROM s1 WHERE true) s1 ON + t1.id = s1.id AND s1.id = 2 + WHEN matched THEN + UPDATE SET id = s1.id, val = random(); + +-- Test STABLE function +CREATE FUNCTION add_s(integer, integer) RETURNS integer +AS 'select $1 + $2;' +LANGUAGE SQL +STABLE RETURNS NULL ON NULL INPUT; + +MERGE INTO t1 +USING s1 ON t1.id = s1.id +WHEN NOT MATCHED THEN + INSERT VALUES(s1.id, add_s(s1.val, 2)); + +-- Test preventing "ON" join condition from writing to the database +BEGIN; +MERGE INTO t1 +USING s1 ON t1.id = s1.id AND t1.id = 2 AND (merge_when_and_write()) +WHEN MATCHED THEN + UPDATE SET val = t1.val + s1.val; +ROLLBACK; + +-- Test preventing WHEN clause(s) from writing to the database +BEGIN; +MERGE INTO t1 +USING s1 ON t1.id = s1.id AND t1.id = 2 +WHEN MATCHED AND (merge_when_and_write()) THEN + UPDATE SET val = t1.val + s1.val; +ROLLBACK; + + -- Joining on partition columns with sub-query MERGE INTO t1 USING (SELECT * FROM s1) sub ON (sub.val = t1.id) -- sub.val is not a distribution column @@ -997,6 +1853,104 @@ WHEN MATCHED THEN WHEN NOT MATCHED THEN INSERT VALUES(mv_source.id, mv_source.val); +-- Distributed tables *must* be colocated +CREATE TABLE dist_target(id int, val varchar); +SELECT create_distributed_table('dist_target', 'id'); +CREATE TABLE dist_source(id int, val varchar); +SELECT create_distributed_table('dist_source', 'id', colocate_with => 'none'); + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); + +-- Distributed tables *must* be joined on distribution column +CREATE TABLE dist_colocated(id int, val int); +SELECT create_distributed_table('dist_colocated', 'id', colocate_with => 'dist_target'); + +MERGE INTO dist_target +USING dist_colocated +ON dist_target.id = dist_colocated.val -- val is not the distribution column +WHEN MATCHED THEN +UPDATE SET val = dist_colocated.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_colocated.id, dist_colocated.val); + + +-- Both the source and target must be distributed +MERGE INTO dist_target +USING (SELECT 100 id) AS source +ON dist_target.id = source.id AND dist_target.val = 'const' +WHEN MATCHED THEN +UPDATE SET val = 'source' +WHEN NOT MATCHED THEN +INSERT VALUES(source.id, 'source'); + +-- Non-hash distributed tables (append/range). +CREATE VIEW show_tables AS +SELECT logicalrelid, partmethod +FROM pg_dist_partition +WHERE (logicalrelid = 'dist_target'::regclass) OR (logicalrelid = 'dist_source'::regclass) +ORDER BY 1; + +SELECT undistribute_table('dist_source'); +SELECT create_distributed_table('dist_source', 'id', 'append'); +SELECT * FROM show_tables; + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); + +SELECT undistribute_table('dist_source'); +SELECT create_distributed_table('dist_source', 'id', 'range'); +SELECT * FROM show_tables; + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); + +-- Both are append tables +SELECT undistribute_table('dist_target'); +SELECT undistribute_table('dist_source'); +SELECT create_distributed_table('dist_target', 'id', 'append'); +SELECT create_distributed_table('dist_source', 'id', 'append'); +SELECT * FROM show_tables; + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); + +-- Both are range tables +SELECT undistribute_table('dist_target'); +SELECT undistribute_table('dist_source'); +SELECT create_distributed_table('dist_target', 'id', 'range'); +SELECT create_distributed_table('dist_source', 'id', 'range'); +SELECT * FROM show_tables; + +MERGE INTO dist_target +USING dist_source +ON dist_target.id = dist_source.id +WHEN MATCHED THEN +UPDATE SET val = dist_source.val +WHEN NOT MATCHED THEN +INSERT VALUES(dist_source.id, dist_source.val); + DROP SERVER foreign_server CASCADE; +DROP FUNCTION merge_when_and_write(); DROP SCHEMA merge_schema CASCADE; SELECT 1 FROM master_remove_node('localhost', :master_port); diff --git a/src/test/regress/sql/merge_arbitrary.sql b/src/test/regress/sql/merge_arbitrary.sql new file mode 100644 index 000000000..17b7d4f90 --- /dev/null +++ b/src/test/regress/sql/merge_arbitrary.sql @@ -0,0 +1,133 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q +\endif + +SET search_path TO merge_arbitrary_schema; +INSERT INTO target_cj VALUES (1, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (2, 'target', 0); +INSERT INTO target_cj VALUES (3, 'target', 0); + +INSERT INTO source_cj1 VALUES (2, 'source-1', 10); +INSERT INTO source_cj2 VALUES (2, 'source-2', 20); + +BEGIN; +MERGE INTO target_cj t +USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +BEGIN; +-- try accessing columns from either side of the source join +MERGE INTO target_cj t +USING source_cj1 s2 + INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10 +ON t.tid = sid1 AND t.tid = 2 +WHEN MATCHED THEN + UPDATE SET src = src1, val = val2 +WHEN NOT MATCHED THEN + DO NOTHING; +SELECT * FROM target_cj ORDER BY 1; +ROLLBACK; + +-- Test PREPARE +PREPARE insert(int, int, int) AS +MERGE INTO prept +USING (SELECT $2, s1, s2 FROM preps WHERE s2 > $3) as foo +ON prept.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + $1 +WHEN NOT MATCHED THEN + INSERT VALUES(s1, s2); + +PREPARE delete(int) AS +MERGE INTO prept +USING preps +ON prept.t1 = preps.s1 +WHEN MATCHED AND prept.t2 = $1 THEN + DELETE +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; + +INSERT INTO prept VALUES(100, 0); + +INSERT INTO preps VALUES(100, 0); +INSERT INTO preps VALUES(200, 0); + +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); + +-- sixth time +EXECUTE insert(1, 1, -1); EXECUTE delete(0); +EXECUTE insert(1, 1, -1); EXECUTE delete(0); + +-- Should have the counter as 14 (7 * 2) +SELECT * FROM prept; + +-- Test local tables +INSERT INTO s1 VALUES(1, 0); -- Matches DELETE clause +INSERT INTO s1 VALUES(2, 1); -- Matches UPDATE clause +INSERT INTO s1 VALUES(3, 1); -- No Match INSERT clause +INSERT INTO s1 VALUES(4, 1); -- No Match INSERT clause +INSERT INTO s1 VALUES(6, 1); -- No Match INSERT clause + +INSERT INTO t1 VALUES(1, 0); -- Will be deleted +INSERT INTO t1 VALUES(2, 0); -- Will be updated +INSERT INTO t1 VALUES(5, 0); -- Will be intact + +PREPARE local(int, int) AS +WITH s1_res AS ( + SELECT * FROM s1 +) +MERGE INTO t1 + USING s1_res ON (s1_res.id = t1.id) + + WHEN MATCHED AND s1_res.val = $1 THEN + DELETE + WHEN MATCHED THEN + UPDATE SET val = t1.val + $2 + WHEN NOT MATCHED THEN + INSERT (id, val) VALUES (s1_res.id, s1_res.val); + +BEGIN; +EXECUTE local(0, 1); +SELECT * FROM t1 order by id; +ROLLBACK; + +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; + +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; + +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; + +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; + +-- sixth time +BEGIN; +EXECUTE local(0, 1); +ROLLBACK; + +BEGIN; +EXECUTE local(0, 1); +SELECT * FROM t1 order by id; +ROLLBACK; diff --git a/src/test/regress/sql/merge_arbitrary_create.sql b/src/test/regress/sql/merge_arbitrary_create.sql new file mode 100644 index 000000000..edf9b0d9d --- /dev/null +++ b/src/test/regress/sql/merge_arbitrary_create.sql @@ -0,0 +1,50 @@ +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +\q +\endif + +DROP SCHEMA IF EXISTS merge_arbitrary_schema CASCADE; +CREATE SCHEMA merge_arbitrary_schema; +SET search_path TO merge_arbitrary_schema; +SET citus.shard_count TO 4; +SET citus.next_shard_id TO 6000000; +CREATE TABLE target_cj(tid int, src text, val int); +CREATE TABLE source_cj1(sid1 int, src1 text, val1 int); +CREATE TABLE source_cj2(sid2 int, src2 text, val2 int); + +SELECT create_distributed_table('target_cj', 'tid'); +SELECT create_distributed_table('source_cj1', 'sid1'); +SELECT create_distributed_table('source_cj2', 'sid2'); + +CREATE TABLE prept(t1 int, t2 int); +CREATE TABLE preps(s1 int, s2 int); + +SELECT create_distributed_table('prept', 't1'), create_distributed_table('preps', 's1'); + +PREPARE insert(int, int, int) AS +MERGE INTO prept +USING (SELECT $2, s1, s2 FROM preps WHERE s2 > $3) as foo +ON prept.t1 = foo.s1 +WHEN MATCHED THEN + UPDATE SET t2 = t2 + $1 +WHEN NOT MATCHED THEN + INSERT VALUES(s1, s2); + +PREPARE delete(int) AS +MERGE INTO prept +USING preps +ON prept.t1 = preps.s1 +WHEN MATCHED AND prept.t2 = $1 THEN + DELETE +WHEN MATCHED THEN + UPDATE SET t2 = t2 + 1; + +-- Citus local tables +CREATE TABLE t1(id int, val int); +CREATE TABLE s1(id int, val int); + +SELECT citus_add_local_table_to_metadata('t1'); +SELECT citus_add_local_table_to_metadata('s1'); diff --git a/src/test/regress/sql/metadata_sync_helpers.sql b/src/test/regress/sql/metadata_sync_helpers.sql index af4bc9247..1c5d5b15d 100644 --- a/src/test/regress/sql/metadata_sync_helpers.sql +++ b/src/test/regress/sql/metadata_sync_helpers.sql @@ -429,12 +429,15 @@ BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; SET application_name to 'citus_internal gpid=10000000001'; \set VERBOSITY terse - CREATE TABLE publication_test_table(id int); - CREATE PUBLICATION publication_test FOR TABLE publication_test_table; + CREATE OPERATOR === ( + LEFTARG = int, + RIGHTARG = int, + FUNCTION = int4eq + ); SET ROLE metadata_sync_helper_role; WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) - AS (VALUES ('publication', ARRAY['publication_test']::text[], ARRAY[]::text[], -1, 0, false)) + AS (VALUES ('operator', ARRAY['===']::text[], ARRAY['int','int']::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) FROM distributed_object_data; ROLLBACK; @@ -746,15 +749,6 @@ BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; SELECT citus_internal_delete_shard_metadata(shardid) FROM shard_data; ROLLBACK; --- the user only allowed to delete shards in a distributed transaction -BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; - SET application_name to 'citus_internal gpid=10000000001'; - \set VERBOSITY terse - WITH shard_data(shardid) - AS (VALUES (1420007)) - SELECT citus_internal_delete_shard_metadata(shardid) FROM shard_data; -ROLLBACK; - -- the user cannot delete non-existing shards BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; SELECT assign_distributed_transaction_id(0, 8, '2021-07-09 15:41:55.542377+02'); diff --git a/src/test/regress/sql/multi_cluster_management.sql b/src/test/regress/sql/multi_cluster_management.sql index f9aa81836..d0bb8b16d 100644 --- a/src/test/regress/sql/multi_cluster_management.sql +++ b/src/test/regress/sql/multi_cluster_management.sql @@ -4,9 +4,12 @@ ALTER SEQUENCE pg_catalog.pg_dist_groupid_seq RESTART 1; -- Tests functions related to cluster membership --- add the nodes to the cluster +-- add the first node to the cluster in transactional mode SELECT 1 FROM master_add_node('localhost', :worker_1_port); +-- add the second node in nontransactional mode +SET citus.metadata_sync_mode TO 'nontransactional'; SELECT 1 FROM master_add_node('localhost', :worker_2_port); +RESET citus.metadata_sync_mode; -- I am coordinator SELECT citus_is_coordinator(); @@ -506,5 +509,19 @@ BEGIN; COMMIT; SELECT start_metadata_sync_to_all_nodes(); +-- nontransactional sync mode tests +SET citus.metadata_sync_mode TO 'nontransactional'; +-- do not allow nontransactional sync inside transaction block +BEGIN; + SELECT start_metadata_sync_to_all_nodes(); +COMMIT; +SELECT start_metadata_sync_to_all_nodes(); +-- do not allow nontransactional node addition inside transaction block +BEGIN; + SELECT citus_remove_node('localhost', :worker_1_port); + SELECT citus_add_node('localhost', :worker_1_port); +COMMIT; +RESET citus.metadata_sync_mode; + -- verify that at the end of this file, all primary nodes have metadata synced SELECT bool_and(hasmetadata) AND bool_and(metadatasynced) FROM pg_dist_node WHERE isactive = 't' and noderole = 'primary'; diff --git a/src/test/regress/sql/multi_data_types.sql b/src/test/regress/sql/multi_data_types.sql index 7601bb319..d307c4c6f 100644 --- a/src/test/regress/sql/multi_data_types.sql +++ b/src/test/regress/sql/multi_data_types.sql @@ -6,6 +6,15 @@ SET citus.next_shard_id TO 530000; +-- Given that other test files depend on the existence of types created in this file, +-- we cannot drop them at the end. Instead, we drop them at the beginning of the test +-- to make this file runnable multiple times via run_test.py. +BEGIN; + SET LOCAL client_min_messages TO WARNING; + DROP TYPE IF EXISTS test_composite_type, other_composite_type, bug_status CASCADE; + DROP OPERATOR FAMILY IF EXISTS cats_op_fam USING hash; +COMMIT; + -- create a custom type... CREATE TYPE test_composite_type AS ( i integer, diff --git a/src/test/regress/sql/multi_extension.sql b/src/test/regress/sql/multi_extension.sql index 8c8ade9d8..d202227ae 100644 --- a/src/test/regress/sql/multi_extension.sql +++ b/src/test/regress/sql/multi_extension.sql @@ -556,6 +556,39 @@ ALTER EXTENSION citus UPDATE TO '11.2-1'; SELECT * FROM pg_dist_placement ORDER BY shardid; SELECT * FROM pg_dist_cleanup; +ALTER EXTENSION citus_columnar UPDATE TO '11.2-1'; + +-- Make sure that we defined dependencies from all rel objects (tables, +-- indexes, sequences ..) to columnar table access method ... +SELECT pg_class.oid INTO columnar_schema_members +FROM pg_class, pg_namespace +WHERE pg_namespace.oid=pg_class.relnamespace AND + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); +SELECT refobjid INTO columnar_schema_members_pg_depend +FROM pg_depend +WHERE classid = 'pg_am'::regclass::oid AND + objid = (select oid from pg_am where amname = 'columnar') AND + objsubid = 0 AND + refclassid = 'pg_class'::regclass::oid AND + refobjsubid = 0 AND + deptype = 'n'; + +-- ... , so this should be empty, +(TABLE columnar_schema_members EXCEPT TABLE columnar_schema_members_pg_depend) +UNION +(TABLE columnar_schema_members_pg_depend EXCEPT TABLE columnar_schema_members); + +-- ... , and both columnar_schema_members_pg_depend & columnar_schema_members +-- should have 5 entries. +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; + +DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; + -- error out as cleanup records remain ALTER EXTENSION citus UPDATE TO '11.0-4'; diff --git a/src/test/regress/sql/multi_modifying_xacts.sql b/src/test/regress/sql/multi_modifying_xacts.sql index 2be3a0911..d38f0cc99 100644 --- a/src/test/regress/sql/multi_modifying_xacts.sql +++ b/src/test/regress/sql/multi_modifying_xacts.sql @@ -1,6 +1,9 @@ SET citus.next_shard_id TO 1200000; SET citus.next_placement_id TO 1200000; +CREATE SCHEMA multi_modifying_xacts; +SET search_path TO multi_modifying_xacts; + -- =================================================================== -- test end-to-end modification functionality -- =================================================================== @@ -114,8 +117,20 @@ INSERT INTO researchers VALUES (8, 5, 'Douglas Engelbart'); INSERT INTO labs VALUES (5, 'Los Alamos'); COMMIT; +SET citus.enable_non_colocated_router_query_pushdown TO ON; + SELECT * FROM researchers, labs WHERE labs.id = researchers.lab_id AND researchers.lab_id = 5; +SET citus.enable_non_colocated_router_query_pushdown TO OFF; + +-- fails because researchers and labs are not colocated +SELECT * FROM researchers, labs WHERE labs.id = researchers.lab_id AND researchers.lab_id = 5; + +-- works thanks to "OFFSET 0" trick +SELECT * FROM (SELECT * FROM researchers OFFSET 0) researchers, labs WHERE labs.id = researchers.lab_id AND researchers.lab_id = 5; + +RESET citus.enable_non_colocated_router_query_pushdown; + -- and the other way around is also allowed BEGIN; INSERT INTO labs VALUES (6, 'Bell Labs'); @@ -169,7 +184,7 @@ INSERT INTO labs VALUES (6, 'Bell Labs'); ABORT; -- but the DDL should correctly roll back -SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.labs'::regclass; +SELECT "Column", "Type", "Modifiers" FROM public.table_desc WHERE relid='multi_modifying_xacts.labs'::regclass; SELECT * FROM labs WHERE id = 6; -- COPY can happen after single row INSERT @@ -294,7 +309,7 @@ CREATE FUNCTION reject_large_id() RETURNS trigger AS $rli$ $rli$ LANGUAGE plpgsql; -- register after insert trigger -SELECT * FROM run_command_on_placements('researchers', 'CREATE CONSTRAINT TRIGGER reject_large_researcher_id AFTER INSERT ON %s DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_large_id()') +SELECT * FROM run_command_on_placements('multi_modifying_xacts.researchers', 'CREATE CONSTRAINT TRIGGER reject_large_researcher_id AFTER INSERT ON %s DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE multi_modifying_xacts.reject_large_id()') ORDER BY nodeport, shardid; -- hide postgresql version dependend messages for next test only @@ -418,6 +433,7 @@ AND s.logicalrelid = 'objects'::regclass; -- create trigger on one worker to reject certain values \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$ @@ -437,6 +453,7 @@ DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- test partial failure; worker_1 succeeds, 2 fails -- in this case, we expect the transaction to abort @@ -465,6 +482,7 @@ DELETE FROM objects; -- there cannot be errors on different shards at different times -- because the first failure will fail the whole transaction \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$ BEGIN @@ -483,6 +501,7 @@ DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; BEGIN; INSERT INTO objects VALUES (1, 'apple'); @@ -506,6 +525,7 @@ AND (s.logicalrelid = 'objects'::regclass OR -- what if the failures happen at COMMIT time? \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON objects_1200003; @@ -515,6 +535,7 @@ DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- should be the same story as before, just at COMMIT time -- as we use 2PC, the transaction is rollbacked @@ -547,6 +568,7 @@ AND s.logicalrelid = 'objects'::regclass; -- what if all nodes have failures at COMMIT time? \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON labs_1200002; @@ -556,6 +578,7 @@ DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- reduce the log level for differences between PG14 and PG15 -- in PGconn->errorMessage @@ -586,10 +609,12 @@ AND (s.logicalrelid = 'objects'::regclass OR -- what if one shard (objects) succeeds but another (labs) completely fails? \c - - - :worker_2_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad ON objects_1200003; \c - - - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200004; BEGIN; INSERT INTO objects VALUES (1, 'apple'); @@ -682,6 +707,7 @@ SELECT * FROM reference_modifying_xacts; -- lets fail on of the workers at before the commit time \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad_reference() RETURNS trigger AS $rb$ BEGIN @@ -700,6 +726,7 @@ DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- try without wrapping inside a transaction INSERT INTO reference_modifying_xacts VALUES (999, 3); @@ -711,6 +738,7 @@ COMMIT; -- lets fail one of the workers at COMMIT time \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_reference ON reference_modifying_xacts_1200006; CREATE CONSTRAINT TRIGGER reject_bad_reference @@ -719,6 +747,7 @@ DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- try without wrapping inside a transaction @@ -740,10 +769,12 @@ ORDER BY s.logicalrelid, sp.shardstate; -- for the time-being drop the constraint \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_reference ON reference_modifying_xacts_1200006; \c - - - :master_port +SET search_path TO multi_modifying_xacts; -- now create a hash distributed table and run tests -- including both the reference table and the hash @@ -777,6 +808,7 @@ ABORT; -- lets fail one of the workers before COMMIT time for the hash table \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.enable_metadata_sync TO OFF; CREATE FUNCTION reject_bad_hash() RETURNS trigger AS $rb$ BEGIN @@ -795,6 +827,7 @@ DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_hash(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- the transaction as a whole should fail @@ -809,6 +842,7 @@ SELECT * FROM reference_modifying_xacts WHERE key = 55; -- now lets fail on of the workers for the hash distributed table table -- when there is a reference table involved \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; DROP TRIGGER reject_bad_hash ON hash_modifying_xacts_1200007; -- the trigger is on execution time @@ -818,6 +852,7 @@ DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad_hash(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse -- the transaction as a whole should fail @@ -844,6 +879,7 @@ ORDER BY s.logicalrelid, sp.shardstate; -- change is rollbacked as well \c - - - :worker_1_port +SET search_path TO multi_modifying_xacts; CREATE CONSTRAINT TRIGGER reject_bad_reference AFTER INSERT ON reference_modifying_xacts_1200006 @@ -851,6 +887,7 @@ DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad_reference(); \c - - - :master_port +SET search_path TO multi_modifying_xacts; \set VERBOSITY terse BEGIN; @@ -920,9 +957,11 @@ SELECT count(*) FROM pg_dist_transaction; -- first create the new user on all nodes CREATE USER test_user; +GRANT ALL ON SCHEMA multi_modifying_xacts TO test_user; -- now connect back to the master with the new user \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200015; CREATE TABLE reference_failure_test (key int, value int); SELECT create_reference_table('reference_failure_test'); @@ -934,16 +973,19 @@ SELECT create_distributed_table('numbers_hash_failure_test', 'key'); -- ensure that the shard is created for this user \c - test_user - :worker_1_port +SET search_path TO multi_modifying_xacts; SET citus.override_table_visibility TO false; \dt reference_failure_test_1200015 -- now connect with the default user, -- and rename the existing user \c - :default_user - :worker_1_port +SET search_path TO multi_modifying_xacts; ALTER USER test_user RENAME TO test_user_new; -- connect back to master and query the reference table \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; -- should fail since the worker doesn't have test_user anymore INSERT INTO reference_failure_test VALUES (1, '1'); @@ -1007,15 +1049,18 @@ SELECT count(*) FROM numbers_hash_failure_test; -- break the other node as well \c - :default_user - :worker_2_port +SET search_path TO multi_modifying_xacts; ALTER USER test_user RENAME TO test_user_new; \c - test_user - :master_port +SET search_path TO multi_modifying_xacts; -- fails on all shard placements INSERT INTO numbers_hash_failure_test VALUES (2,2); -- connect back to the master with the proper user to continue the tests \c - :default_user - :master_port +SET search_path TO multi_modifying_xacts; SET citus.next_shard_id TO 1200020; SET citus.next_placement_id TO 1200033; -- unbreak both nodes by renaming the user back to the original name @@ -1024,6 +1069,7 @@ SELECT * FROM run_command_on_workers('ALTER USER test_user_new RENAME TO test_us DROP TABLE reference_modifying_xacts, hash_modifying_xacts, hash_modifying_xacts_second, reference_failure_test, numbers_hash_failure_test; +REVOKE ALL ON SCHEMA multi_modifying_xacts FROM test_user; DROP USER test_user; -- set up foreign keys to test transactions with co-located and reference tables @@ -1043,7 +1089,10 @@ CREATE TABLE itemgroups ( ); SELECT create_reference_table('itemgroups'); +SET client_min_messages TO WARNING; DROP TABLE IF EXISTS users ; +RESET client_min_messages; + CREATE TABLE users ( id int PRIMARY KEY, name text, @@ -1199,5 +1248,5 @@ SELECT insert_abort(); SELECT name FROM labs WHERE id = 1001; RESET citus.function_opens_transaction_block; -DROP FUNCTION insert_abort(); -DROP TABLE items, users, itemgroups, usergroups, researchers, labs; +SET client_min_messages TO WARNING; +DROP SCHEMA multi_modifying_xacts CASCADE; diff --git a/src/test/regress/sql/multi_move_mx.sql b/src/test/regress/sql/multi_move_mx.sql index 166069a6e..9cfa8a3db 100644 --- a/src/test/regress/sql/multi_move_mx.sql +++ b/src/test/regress/sql/multi_move_mx.sql @@ -151,8 +151,34 @@ ORDER BY shardid LIMIT 1 OFFSET 1; +-- Check that shards of a table with GENERATED columns can be moved. +\c - - - :master_port +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; + +CREATE TABLE mx_table_with_generated_column (a int, b int GENERATED ALWAYS AS ( a + 3 ) STORED, c int); +SELECT create_distributed_table('mx_table_with_generated_column', 'a'); + +-- Check that dropped columns are handled properly in a move. +ALTER TABLE mx_table_with_generated_column DROP COLUMN c; + +-- Move a shard from worker 1 to worker 2 +SELECT + citus_move_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical') +FROM + pg_dist_shard NATURAL JOIN pg_dist_shard_placement +WHERE + logicalrelid = 'mx_table_with_generated_column'::regclass + AND nodeport = :worker_1_port +ORDER BY + shardid +LIMIT 1; + -- Cleanup \c - - - :master_port +SET client_min_messages TO WARNING; +CALL citus_cleanup_orphaned_resources(); +DROP TABLE mx_table_with_generated_column; DROP TABLE mx_table_1; DROP TABLE mx_table_2; DROP TABLE mx_table_3; diff --git a/src/test/regress/sql/multi_mx_copy_data.sql b/src/test/regress/sql/multi_mx_copy_data.sql index 26d4d3c42..b4598ae61 100644 --- a/src/test/regress/sql/multi_mx_copy_data.sql +++ b/src/test/regress/sql/multi_mx_copy_data.sql @@ -2,6 +2,11 @@ -- MULTI_MX_COPY_DATA -- +-- We truncate them to make this test runnable multiple times. +-- Note that we cannot do that at the end of the test because +-- we need to keep the data for the other tests. +TRUNCATE lineitem_mx, orders_mx; + \set nation_data_file :abs_srcdir '/data/nation.data' \set client_side_copy_command '\\copy nation_hash FROM ' :'nation_data_file' ' with delimiter '''|''';' :client_side_copy_command @@ -96,3 +101,5 @@ SET search_path TO public; :client_side_copy_command \set client_side_copy_command '\\copy supplier_mx FROM ' :'supplier_data_file' ' with delimiter '''|''';' :client_side_copy_command + +DROP TABLE citus_mx_test_schema.nation_hash_replicated; diff --git a/src/test/regress/sql/multi_mx_hide_shard_names.sql b/src/test/regress/sql/multi_mx_hide_shard_names.sql index 281815d4c..9d2536973 100644 --- a/src/test/regress/sql/multi_mx_hide_shard_names.sql +++ b/src/test/regress/sql/multi_mx_hide_shard_names.sql @@ -232,10 +232,15 @@ SELECT set_backend_type(4); SELECT relname FROM pg_catalog.pg_class WHERE relnamespace = 'mx_hide_shard_names'::regnamespace ORDER BY relname; -- or, we set it to walsender --- the shards and indexes do show up +-- the shards and indexes do not show up SELECT set_backend_type(9); SELECT relname FROM pg_catalog.pg_class WHERE relnamespace = 'mx_hide_shard_names'::regnamespace ORDER BY relname; +-- unless the application name starts with citus_shard +SET application_name = 'citus_shard_move'; +SELECT relname FROM pg_catalog.pg_class WHERE relnamespace = 'mx_hide_shard_names'::regnamespace ORDER BY relname; +RESET application_name; + -- but, client backends to see the shards SELECT set_backend_type(3); SELECT relname FROM pg_catalog.pg_class WHERE relnamespace = 'mx_hide_shard_names'::regnamespace ORDER BY relname; diff --git a/src/test/regress/sql/multi_mx_modifying_xacts.sql b/src/test/regress/sql/multi_mx_modifying_xacts.sql index cf60f023d..924267c8d 100644 --- a/src/test/regress/sql/multi_mx_modifying_xacts.sql +++ b/src/test/regress/sql/multi_mx_modifying_xacts.sql @@ -116,7 +116,19 @@ INSERT INTO researchers_mx VALUES (8, 5, 'Douglas Engelbart'); INSERT INTO labs_mx VALUES (5, 'Los Alamos'); COMMIT; -SELECT * FROM researchers_mx, labs_mx WHERE labs_mx.id = researchers_mx.lab_id and researchers_mx.lab_id = 5;; +SET citus.enable_non_colocated_router_query_pushdown TO ON; + +SELECT * FROM researchers_mx, labs_mx WHERE labs_mx.id = researchers_mx.lab_id and researchers_mx.lab_id = 5 ORDER BY 1,2,3,4,5; + +SET citus.enable_non_colocated_router_query_pushdown TO OFF; + +-- fails because researchers and labs are not colocated +SELECT * FROM researchers_mx, labs_mx WHERE labs_mx.id = researchers_mx.lab_id and researchers_mx.lab_id = 5; + +-- works thanks to "OFFSET 0" trick +SELECT * FROM (SELECT * FROM researchers_mx OFFSET 0) researchers_mx, labs_mx WHERE labs_mx.id = researchers_mx.lab_id and researchers_mx.lab_id = 5 ORDER BY 1,2,3,4,5; + +RESET citus.enable_non_colocated_router_query_pushdown; -- and the other way around is also allowed BEGIN; @@ -133,8 +145,20 @@ INSERT INTO researchers_mx VALUES (8, 5, 'Douglas Engelbart'); INSERT INTO labs_mx VALUES (5, 'Los Alamos'); COMMIT; +SET citus.enable_non_colocated_router_query_pushdown TO ON; + +SELECT * FROM researchers_mx, labs_mx WHERE labs_mx.id = researchers_mx.lab_id and researchers_mx.lab_id = 5 ORDER BY 1,2,3,4,5; + +SET citus.enable_non_colocated_router_query_pushdown TO OFF; + +-- fails because researchers and labs are not colocated SELECT * FROM researchers_mx, labs_mx WHERE labs_mx.id = researchers_mx.lab_id and researchers_mx.lab_id = 5; +-- works thanks to "OFFSET 0" trick +SELECT * FROM (SELECT * FROM researchers_mx OFFSET 0) researchers_mx, labs_mx WHERE labs_mx.id = researchers_mx.lab_id and researchers_mx.lab_id = 5 ORDER BY 1,2,3,4,5; + +RESET citus.enable_non_colocated_router_query_pushdown; + -- and the other way around is also allowed BEGIN; SET LOCAL citus.enable_local_execution TO off; @@ -331,3 +355,7 @@ COMMIT; -- no data should persists SELECT * FROM objects_mx WHERE id = 1; SELECT * FROM labs_mx WHERE id = 8; + +TRUNCATE objects_mx, labs_mx, researchers_mx; +DROP TRIGGER reject_bad_mx ON labs_mx_1220102; +DROP FUNCTION reject_bad_mx; diff --git a/src/test/regress/sql/multi_mx_router_planner.sql b/src/test/regress/sql/multi_mx_router_planner.sql index fdfd81b07..3593c2ac8 100644 --- a/src/test/regress/sql/multi_mx_router_planner.sql +++ b/src/test/regress/sql/multi_mx_router_planner.sql @@ -275,11 +275,25 @@ SELECT a.author_id as first_author, b.word_count as second_word_count LIMIT 3; -- following join is router plannable since the same worker --- has both shards +-- has both shards when citus.enable_non_colocated_router_query_pushdown +-- is enabled + +SET citus.enable_non_colocated_router_query_pushdown TO ON; + SELECT a.author_id as first_author, b.word_count as second_word_count FROM articles_hash_mx a, articles_single_shard_hash_mx b WHERE a.author_id = 10 and a.author_id = b.author_id - LIMIT 3; + ORDER by 1,2 LIMIT 3; + +SET citus.enable_non_colocated_router_query_pushdown TO OFF; + +-- but this is not the case otherwise +SELECT a.author_id as first_author, b.word_count as second_word_count + FROM articles_hash_mx a, articles_single_shard_hash_mx b + WHERE a.author_id = 10 and a.author_id = b.author_id + ORDER by 1,2 LIMIT 3; + +RESET citus.enable_non_colocated_router_query_pushdown; -- following join is not router plannable since there are no -- workers containing both shards, but will work through recursive @@ -657,3 +671,8 @@ INSERT INTO articles_hash_mx VALUES (51, 1, 'amateus', 1814); SELECT id FROM articles_hash_mx WHERE author_id = 1; + +SET client_min_messages to WARNING; +TRUNCATE articles_hash_mx, company_employees_mx, articles_single_shard_hash_mx; +DROP MATERIALIZED VIEW mv_articles_hash_mx_error; +DROP TABLE authors_hash_mx; diff --git a/src/test/regress/sql/multi_mx_schema_support.sql b/src/test/regress/sql/multi_mx_schema_support.sql index e2eceb0b3..7f1e5d0de 100644 --- a/src/test/regress/sql/multi_mx_schema_support.sql +++ b/src/test/regress/sql/multi_mx_schema_support.sql @@ -38,6 +38,7 @@ END; -- test inserting to table in different schema SET search_path TO public; +DELETE from citus_mx_test_schema.nation_hash where n_nationkey = 100; -- allow rerunning this file INSERT INTO citus_mx_test_schema.nation_hash(n_nationkey, n_name, n_regionkey) VALUES (100, 'TURKEY', 3); -- verify insertion @@ -46,6 +47,7 @@ SELECT * FROM citus_mx_test_schema.nation_hash WHERE n_nationkey = 100; -- test with search_path is set SET search_path TO citus_mx_test_schema; +DELETE from nation_hash where n_nationkey = 101; -- allow rerunning this file INSERT INTO nation_hash(n_nationkey, n_name, n_regionkey) VALUES (101, 'GERMANY', 3); -- verify insertion @@ -307,7 +309,7 @@ CREATE TABLE mx_old_schema.table_set_schema (id int); SELECT create_distributed_table('mx_old_schema.table_set_schema', 'id'); CREATE SCHEMA mx_new_schema; -SELECT objid::oid::regnamespace as "Distributed Schemas" +SELECT objid::oid::regnamespace::text as "Distributed Schemas" FROM pg_catalog.pg_dist_object WHERE objid::oid::regnamespace IN ('mx_old_schema', 'mx_new_schema') ORDER BY "Distributed Schemas"; @@ -324,9 +326,10 @@ ALTER SCHEMA mx_old_schema RENAME TO temp_mx_old_schema; ALTER TABLE mx_old_schema.table_set_schema SET SCHEMA mx_new_schema; -SELECT objid::oid::regnamespace as "Distributed Schemas" +SELECT objid::oid::regnamespace::text as "Distributed Schemas" FROM pg_catalog.pg_dist_object - WHERE objid::oid::regnamespace IN ('mx_old_schema', 'mx_new_schema'); + WHERE objid::oid::regnamespace IN ('mx_old_schema', 'mx_new_schema') + ORDER BY "Distributed Schemas"; \c - - - :worker_1_port SELECT table_schema AS "Table's Schema" FROM information_schema.tables WHERE table_name='table_set_schema'; SELECT table_schema AS "Shards' Schema" @@ -392,3 +395,6 @@ SELECT COUNT(*)=0 FROM pg_dist_partition WHERE logicalrelid='new_schema.t1'::reg DROP SCHEMA old_schema, new_schema CASCADE; DROP SCHEMA mx_old_schema CASCADE; DROP SCHEMA mx_new_schema CASCADE; +DROP SCHEMA localschema; +DROP ROLE schema_owner; +DROP ROLE role_to_be_granted; diff --git a/src/test/regress/sql/multi_router_planner.sql b/src/test/regress/sql/multi_router_planner.sql index 87104599c..2ccd43ea3 100644 --- a/src/test/regress/sql/multi_router_planner.sql +++ b/src/test/regress/sql/multi_router_planner.sql @@ -10,6 +10,9 @@ SET citus.next_shard_id TO 840000; -- other tests that triggers fast-path-router planner SET citus.enable_fast_path_router_planner TO false; +CREATE SCHEMA multi_router_planner; +SET search_path TO multi_router_planner; + CREATE TABLE articles_hash ( id bigint NOT NULL, author_id bigint NOT NULL, @@ -381,11 +384,26 @@ SELECT a.author_id as first_author, b.word_count as second_word_count LIMIT 3; -- following join is router plannable since the same worker --- has both shards +-- has both shards when citus.enable_non_colocated_router_query_pushdown +-- is enabled + +SET citus.enable_non_colocated_router_query_pushdown TO ON; + SELECT a.author_id as first_author, b.word_count as second_word_count FROM articles_hash a, articles_single_shard_hash b WHERE a.author_id = 10 and a.author_id = b.author_id - LIMIT 3; + ORDER BY 1,2 LIMIT 3; + +SET citus.enable_non_colocated_router_query_pushdown TO OFF; + +-- but this is not the case otherwise + +SELECT a.author_id as first_author, b.word_count as second_word_count + FROM articles_hash a, articles_single_shard_hash b + WHERE a.author_id = 10 and a.author_id = b.author_id + ORDER BY 1,2 LIMIT 3; + +RESET citus.enable_non_colocated_router_query_pushdown; -- following join is not router plannable since there are no -- workers containing both shards, but will work through recursive @@ -646,10 +664,26 @@ SELECT * FROM articles_hash WHERE author_id = 1 and 1=0; +-- Even if the where clause contains "false", the query is not router +-- plannable when citus.enable_non_colocated_router_query_pushdown +-- is disabled. This is because, the tables are not colocated. + +SET citus.enable_non_colocated_router_query_pushdown TO ON; + +-- the same query, router plannable SELECT a.author_id as first_author, b.word_count as second_word_count FROM articles_hash a, articles_single_shard_hash b WHERE a.author_id = 10 and a.author_id = b.author_id and false; +SET citus.enable_non_colocated_router_query_pushdown TO OFF; + +-- the same query, _not_ router plannable +SELECT a.author_id as first_author, b.word_count as second_word_count + FROM articles_hash a, articles_single_shard_hash b + WHERE a.author_id = 10 and a.author_id = b.author_id and false; + +RESET citus.enable_non_colocated_router_query_pushdown; + SELECT * FROM articles_hash WHERE null; @@ -900,9 +934,20 @@ SELECT * FROM articles_range ar join authors_range au on (ar.id = au.id) -- join between hash and range partition tables are router plannable -- only if both tables pruned down to single shard and co-located on the same -- node. --- router plannable + +SET citus.enable_non_colocated_router_query_pushdown TO ON; + +-- router plannable when citus.enable_non_colocated_router_query_pushdown is on SELECT * FROM articles_hash ar join authors_range au on (ar.author_id = au.id) - WHERE ar.author_id = 2; + WHERE ar.author_id = 2 ORDER BY 1,2,3,4,5,6; + +SET citus.enable_non_colocated_router_query_pushdown TO OFF; + +-- not router plannable otherwise +SELECT * FROM articles_hash ar join authors_range au on (ar.author_id = au.id) + WHERE ar.author_id = 2 ORDER BY 1,2,3,4,5,6; + +RESET citus.enable_non_colocated_router_query_pushdown; -- not router plannable SELECT * FROM articles_hash ar join authors_range au on (ar.author_id = au.id) @@ -1182,12 +1227,15 @@ SELECT create_distributed_table('failure_test', 'a', 'hash'); SET citus.enable_ddl_propagation TO off; CREATE USER router_user; -GRANT INSERT ON ALL TABLES IN SCHEMA public TO router_user; +GRANT USAGE ON SCHEMA multi_router_planner TO router_user; +GRANT INSERT ON ALL TABLES IN SCHEMA multi_router_planner TO router_user; \c - - - :worker_1_port SET citus.enable_ddl_propagation TO off; CREATE USER router_user; -GRANT INSERT ON ALL TABLES IN SCHEMA public TO router_user; +GRANT USAGE ON SCHEMA multi_router_planner TO router_user; +GRANT INSERT ON ALL TABLES IN SCHEMA multi_router_planner TO router_user; \c - router_user - :master_port +SET search_path TO multi_router_planner; -- we will fail to connect to worker 2, since the user does not exist -- still, we never mark placements inactive. Instead, fail the transaction BEGIN; @@ -1199,29 +1247,48 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'failure_test'::regclass ) - ORDER BY placementid; + ORDER BY shardid, nodeport; \c - postgres - :worker_1_port DROP OWNED BY router_user; DROP USER router_user; \c - - - :master_port DROP OWNED BY router_user; DROP USER router_user; -DROP TABLE failure_test; -DROP FUNCTION author_articles_max_id(); -DROP FUNCTION author_articles_id_word_count(); +SET search_path TO multi_router_planner; +SET citus.next_shard_id TO 850000; -DROP MATERIALIZED VIEW mv_articles_hash_empty; -DROP MATERIALIZED VIEW mv_articles_hash_data; +SET citus.shard_replication_factor TO 1; +CREATE TABLE single_shard_dist(a int, b int); +SELECT create_distributed_table('single_shard_dist', 'a', shard_count=>1); -DROP VIEW num_db; -DROP FUNCTION number1(); +SET citus.shard_replication_factor TO 2; +CREATE TABLE table_with_four_shards(a int, b int); +SELECT create_distributed_table('table_with_four_shards', 'a', shard_count=>4); -DROP TABLE articles_hash; -DROP TABLE articles_single_shard_hash; -DROP TABLE authors_hash; -DROP TABLE authors_range; -DROP TABLE authors_reference; -DROP TABLE company_employees; -DROP TABLE articles_range; -DROP TABLE articles_append; +SET client_min_messages TO DEBUG2; + +-- Make sure that router rejects planning this query because +-- the target shards are not placed on the same node when +-- citus.enable_non_colocated_router_query_pushdown is disabled. +-- Otherwise, it throws a somewhat meaningless error but we assume +-- that the user is aware of the setting. + +SET citus.enable_non_colocated_router_query_pushdown TO ON; + +WITH cte AS ( + DELETE FROM table_with_four_shards WHERE a = 1 RETURNING * +) +SELECT * FROM single_shard_dist WHERE b IN (SELECT b FROM cte); + +SET citus.enable_non_colocated_router_query_pushdown TO OFF; + +WITH cte AS ( + DELETE FROM table_with_four_shards WHERE a = 1 RETURNING * +) +SELECT * FROM single_shard_dist WHERE b IN (SELECT b FROM cte); + +RESET citus.enable_non_colocated_router_query_pushdown; + +SET client_min_messages TO WARNING; +DROP SCHEMA multi_router_planner CASCADE; diff --git a/src/test/regress/sql/multi_simple_queries.sql b/src/test/regress/sql/multi_simple_queries.sql index 8d7e45255..bb1a1f85b 100644 --- a/src/test/regress/sql/multi_simple_queries.sql +++ b/src/test/regress/sql/multi_simple_queries.sql @@ -11,6 +11,9 @@ SET citus.coordinator_aggregation_strategy TO 'disabled'; -- test end-to-end query functionality -- =================================================================== +CREATE SCHEMA simple_queries_test; +SET search_path TO simple_queries_test; + CREATE TABLE articles ( id bigint NOT NULL, author_id bigint NOT NULL, @@ -203,12 +206,12 @@ SELECT author_id FROM articles HAVING author_id <= 2 OR author_id = 8 ORDER BY author_id; -SELECT o_orderstatus, count(*), avg(o_totalprice) FROM orders +SELECT o_orderstatus, count(*), avg(o_totalprice) FROM public.orders GROUP BY o_orderstatus HAVING count(*) > 1450 OR avg(o_totalprice) > 150000 ORDER BY o_orderstatus; -SELECT o_orderstatus, sum(l_linenumber), avg(l_linenumber) FROM lineitem, orders +SELECT o_orderstatus, sum(l_linenumber), avg(l_linenumber) FROM public.lineitem, public.orders WHERE l_orderkey = o_orderkey AND l_orderkey > 9030 GROUP BY o_orderstatus HAVING sum(l_linenumber) > 1000 @@ -245,12 +248,34 @@ SELECT a.author_id as first_author, b.word_count as second_word_count WHERE a.author_id = 10 and a.author_id = b.author_id LIMIT 3; --- now show that JOINs with multiple tables are not router executable --- they are executed by real-time executor +-- Not router plannable when citus.enable_non_colocated_router_query_pushdown +-- is disabled. + +SET citus.enable_non_colocated_router_query_pushdown TO ON; + SELECT a.author_id as first_author, b.word_count as second_word_count FROM articles a, articles_single_shard b WHERE a.author_id = 10 and a.author_id = b.author_id - LIMIT 3; + ORDER BY 1,2 LIMIT 3; + +SET citus.enable_non_colocated_router_query_pushdown TO OFF; + +SELECT a.author_id as first_author, b.word_count as second_word_count + FROM articles a, articles_single_shard b + WHERE a.author_id = 10 and a.author_id = b.author_id + ORDER BY 1,2 LIMIT 3; + +-- but they can be executed via repartition join planner +SET citus.enable_repartition_joins TO ON; + +SELECT a.author_id as first_author, b.word_count as second_word_count + FROM articles a, articles_single_shard b + WHERE a.author_id = 10 and a.author_id = b.author_id + ORDER BY 1,2 LIMIT 3; + +RESET citus.enable_repartition_joins; + +RESET citus.enable_non_colocated_router_query_pushdown; -- do not create the master query for LIMIT on a single shard SELECT SELECT * @@ -277,7 +302,7 @@ SELECT avg(word_count) -- error out on unsupported aggregate SET client_min_messages to 'NOTICE'; -CREATE AGGREGATE public.invalid(int) ( +CREATE AGGREGATE invalid(int) ( sfunc = int4pl, stype = int ); @@ -355,7 +380,8 @@ SELECT nextval('query_seq')*2 FROM articles LIMIT 3; SELECT * FROM (SELECT nextval('query_seq') FROM articles LIMIT 3) vals; -- but not elsewhere -SELECT sum(nextval('query_seq')) FROM articles; -SELECT n FROM (SELECT nextval('query_seq') n, random() FROM articles) vals; +SELECT sum(nextval('simple_queries_test.query_seq')) FROM articles; +SELECT n FROM (SELECT nextval('simple_queries_test.query_seq') n, random() FROM articles) vals; -DROP SEQUENCE query_seq; +SET client_min_messages TO WARNING; +DROP SCHEMA simple_queries_test CASCADE; diff --git a/src/test/regress/sql/multi_upsert.sql b/src/test/regress/sql/multi_upsert.sql index 24503b7a4..6ef72d576 100644 --- a/src/test/regress/sql/multi_upsert.sql +++ b/src/test/regress/sql/multi_upsert.sql @@ -3,6 +3,8 @@ SET citus.next_shard_id TO 980000; +CREATE SCHEMA upsert_test; +SET search_path TO upsert_test; CREATE TABLE upsert_test ( @@ -207,3 +209,6 @@ INSERT INTO upsert_test (part_key, other_col) VALUES (1, 1) ON CONFLICT (part_ke -- error out on attempt to update the partition key INSERT INTO upsert_test (part_key, other_col) VALUES (1, 1) ON CONFLICT (part_key) DO UPDATE SET part_key = 15; + +SET client_min_messages TO WARNING; +DROP SCHEMA upsert_test CASCADE; diff --git a/src/test/regress/sql/pg15.sql b/src/test/regress/sql/pg15.sql index 121b41f86..ac8062c65 100644 --- a/src/test/regress/sql/pg15.sql +++ b/src/test/regress/sql/pg15.sql @@ -269,16 +269,21 @@ WITH targq AS ( MERGE INTO tbl1 USING targq ON (true) WHEN MATCHED THEN DELETE; --- crashes on beta3, fixed on 15 stable ---WITH foo AS ( --- MERGE INTO tbl1 USING tbl2 ON (true) --- WHEN MATCHED THEN DELETE ---) SELECT * FROM foo; +WITH foo AS ( + MERGE INTO tbl1 USING tbl2 ON (true) + WHEN MATCHED THEN DELETE +) SELECT * FROM foo; ---COPY ( --- MERGE INTO tbl1 USING tbl2 ON (true) --- WHEN MATCHED THEN DELETE ---) TO stdout; +COPY ( + MERGE INTO tbl1 USING tbl2 ON (true) + WHEN MATCHED THEN DELETE +) TO stdout; + +MERGE INTO tbl1 t +USING tbl2 +ON (true) +WHEN MATCHED THEN + DO NOTHING; MERGE INTO tbl1 t USING tbl2 diff --git a/src/test/regress/sql/pgmerge.sql b/src/test/regress/sql/pgmerge.sql index 6842f516a..9b828f27e 100644 --- a/src/test/regress/sql/pgmerge.sql +++ b/src/test/regress/sql/pgmerge.sql @@ -608,6 +608,14 @@ USING wq_source s ON t.tid = s.sid WHEN MATCHED AND (merge_when_and_write()) THEN UPDATE SET balance = t.balance + s.balance; ROLLBACK; + +-- Test preventing ON condition from writing to the database +BEGIN; +MERGE INTO wq_target t +USING wq_source s ON t.tid = s.sid AND (merge_when_and_write()) +WHEN MATCHED THEN + UPDATE SET balance = t.balance + s.balance; +ROLLBACK; drop function merge_when_and_write(); DROP TABLE wq_target, wq_source; @@ -1164,12 +1172,14 @@ INSERT INTO pa_target SELECT '2017-02-28', id, id * 100, 'initial' FROM generate SET client_min_messages TO DEBUG1; BEGIN; MERGE INTO pa_target t - USING (SELECT '2017-01-15' AS slogts, * FROM pa_source WHERE sid < 10) s + USING (SELECT * FROM pa_source WHERE sid < 10) s + --USING (SELECT '2017-01-15' AS slogts, * FROM pa_source WHERE sid < 10) s ON t.tid = s.sid WHEN MATCHED THEN UPDATE SET balance = balance + delta, val = val || ' updated by merge' WHEN NOT MATCHED THEN - INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge'); + INSERT VALUES ('2017-01-15', sid, delta, 'inserted by merge'); + --INSERT VALUES (slogts::timestamp, sid, delta, 'inserted by merge'); SELECT * FROM pa_target ORDER BY tid; ROLLBACK; RESET client_min_messages; diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql new file mode 100644 index 000000000..3fd6128b8 --- /dev/null +++ b/src/test/regress/sql/publication.sql @@ -0,0 +1,269 @@ +CREATE SCHEMA publication; +CREATE SCHEMA "publication-1"; +SET search_path TO publication; +SET citus.shard_replication_factor TO 1; + +-- for citus_add_local_table_to_metadata / create_distributed_table_concurrently +SELECT citus_set_coordinator_host('localhost', :master_port); + +CREATE OR REPLACE FUNCTION activate_node_snapshot() + RETURNS text[] + LANGUAGE C STRICT + AS 'citus'; +COMMENT ON FUNCTION activate_node_snapshot() + IS 'commands to activate node snapshot'; + +\c - - - :worker_1_port +SET citus.enable_ddl_propagation TO off; + +CREATE OR REPLACE FUNCTION activate_node_snapshot() + RETURNS text[] + LANGUAGE C STRICT + AS 'citus'; +COMMENT ON FUNCTION activate_node_snapshot() + IS 'commands to activate node snapshot'; + +\c - - - :worker_2_port +SET citus.enable_ddl_propagation TO off; + +CREATE OR REPLACE FUNCTION activate_node_snapshot() + RETURNS text[] + LANGUAGE C STRICT + AS 'citus'; +COMMENT ON FUNCTION activate_node_snapshot() + IS 'commands to activate node snapshot'; + +-- create some publications with conflicting names on worker node + +-- publication will be different from coordinator +CREATE PUBLICATION "pub-all"; +-- publication will be same as coordinator +CREATE PUBLICATION "pub-all-insertupdateonly" FOR ALL TABLES WITH (publish = 'insert, update');; + +\c - - - :master_port +SET search_path TO publication; +SET citus.shard_replication_factor TO 1; + +-- do not create publications on worker 2 initially +SELECT citus_remove_node('localhost', :worker_2_port); + +-- create a non-distributed publication +SET citus.enable_ddl_propagation TO off; +CREATE PUBLICATION pubnotdistributed WITH (publish = 'delete'); +RESET citus.enable_ddl_propagation; +ALTER PUBLICATION pubnotdistributed SET (publish = 'truncate'); + +-- create regular, distributed publications +CREATE PUBLICATION pubempty; +CREATE PUBLICATION pubinsertonly WITH (publish = 'insert'); +CREATE PUBLICATION "pub-all" FOR ALL TABLES; +CREATE PUBLICATION "pub-all-insertupdateonly" FOR ALL TABLES WITH (publish = 'insert, update'); + +-- add worker 2 with publications +SELECT 1 FROM citus_add_node('localhost', :worker_2_port); + +-- Check publications on all the nodes, if we see the same publication name twice then its definition differs +-- Note that publications are special in the sense that the coordinator object might differ from +-- worker objects due to the presence of regular tables. +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' ORDER BY 1) s$$) + ORDER BY c) s; + +CREATE TABLE test (x int primary key, y int, "column-1" int, doc xml); +CREATE TABLE "test-pubs" (x int primary key, y int, "column-1" int); +CREATE TABLE "publication-1"."test-pubs" (x int primary key, y int, "column-1" int); + +-- various operations on a publication with only local tables +CREATE PUBLICATION pubtables_orig FOR TABLE test, "test-pubs", "publication-1"."test-pubs" WITH (publish = 'insert, truncate'); +ALTER PUBLICATION pubtables_orig DROP TABLE test; +ALTER PUBLICATION pubtables_orig ADD TABLE test; + +-- publication will be empty on worker nodes, since all tables are local +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubtables%' ORDER BY 1) s$$) + ORDER BY c) s; + +-- distribute a table, creating a mixed publication +SELECT create_distributed_table('test','x', colocate_with := 'none'); + +-- some generic operations +ALTER PUBLICATION pubtables_orig RENAME TO pubtables; +ALTER PUBLICATION pubtables SET (publish = 'insert, update, delete'); +ALTER PUBLICATION pubtables OWNER TO postgres; +ALTER PUBLICATION pubtables SET (publish = 'inert, update, delete'); +ALTER PUBLICATION pubtables ADD TABLE notexist; + +-- operations with a distributed table +ALTER PUBLICATION pubtables DROP TABLE test; +ALTER PUBLICATION pubtables ADD TABLE test; +ALTER PUBLICATION pubtables SET TABLE test, "test-pubs", "publication-1"."test-pubs"; + +-- operations with a local table in a mixed publication +ALTER PUBLICATION pubtables DROP TABLE "test-pubs"; +ALTER PUBLICATION pubtables ADD TABLE "test-pubs"; + +SELECT create_distributed_table('"test-pubs"', 'x'); + +-- test and test-pubs will show up in worker nodes +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubtables%' ORDER BY 1) s$$) + ORDER BY c) s; + +-- operations with a strangely named distributed table in a mixed publication +ALTER PUBLICATION pubtables DROP TABLE "test-pubs"; +ALTER PUBLICATION pubtables ADD TABLE "test-pubs"; + +-- create a publication with distributed and local tables +DROP PUBLICATION pubtables; +CREATE PUBLICATION pubtables FOR TABLE test, "test-pubs", "publication-1"."test-pubs"; + +-- change distributed tables +SELECT alter_distributed_table('test', shard_count := 5, cascade_to_colocated := true); +SELECT undistribute_table('test'); +SELECT citus_add_local_table_to_metadata('test'); +SELECT create_distributed_table_concurrently('test', 'x'); +SELECT undistribute_table('"test-pubs"'); +SELECT create_reference_table('"test-pubs"'); + +-- publications are unchanged despite various tranformations +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubtables%' ORDER BY 1) s$$) + ORDER BY c) s; + +-- partitioned table +CREATE TABLE testpub_partitioned (a int, b text, c text) PARTITION BY RANGE (a); +CREATE TABLE testpub_partitioned_0 PARTITION OF testpub_partitioned FOR VALUES FROM (1) TO (10); +ALTER TABLE testpub_partitioned_0 ADD PRIMARY KEY (a); +ALTER TABLE testpub_partitioned_0 REPLICA IDENTITY USING INDEX testpub_partitioned_0_pkey; +CREATE TABLE testpub_partitioned_1 PARTITION OF testpub_partitioned FOR VALUES FROM (11) TO (20); +ALTER TABLE testpub_partitioned_1 ADD PRIMARY KEY (a); +ALTER TABLE testpub_partitioned_1 REPLICA IDENTITY USING INDEX testpub_partitioned_1_pkey; +CREATE PUBLICATION pubpartitioned FOR TABLE testpub_partitioned WITH (publish_via_partition_root = 'true'); + +SELECT create_distributed_table('testpub_partitioned', 'a'); + +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubpartitioned%' ORDER BY 1) s$$) + ORDER BY c) s; + +DROP PUBLICATION pubpartitioned; +CREATE PUBLICATION pubpartitioned FOR TABLE testpub_partitioned WITH (publish_via_partition_root = 'true'); + +-- add a partition +ALTER PUBLICATION pubpartitioned ADD TABLE testpub_partitioned_1; + +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLIATION%' AND c LIKE '%pubpartitioned%' ORDER BY 1) s$$) + ORDER BY c) s; + +-- make sure we can sync all the publication metadata +SELECT start_metadata_sync_to_all_nodes(); + +DROP PUBLICATION pubempty; +DROP PUBLICATION pubtables; +DROP PUBLICATION pubinsertonly; +DROP PUBLICATION "pub-all-insertupdateonly"; +DROP PUBLICATION "pub-all"; +DROP PUBLICATION pubpartitioned; +DROP PUBLICATION pubnotdistributed; + +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 +\gset +\if :server_version_ge_15 +\else +SET client_min_messages TO ERROR; +DROP SCHEMA publication CASCADE; +DROP SCHEMA "publication-1" CASCADE; + +SELECT citus_remove_node('localhost', :master_port); +\q +\endif + +-- recreate a mixed publication +CREATE PUBLICATION pubtables FOR TABLE test, "publication-1"."test-pubs"; + +-- operations on an existing distributed table +ALTER PUBLICATION pubtables DROP TABLE test; +ALTER PUBLICATION pubtables ADD TABLE test (y); +ALTER PUBLICATION pubtables SET TABLE test WHERE (doc IS DOCUMENT); +ALTER PUBLICATION pubtables SET TABLE test WHERE (xmlexists('//foo[text() = ''bar'']' PASSING BY VALUE doc)); +ALTER PUBLICATION pubtables SET TABLE test WHERE (CASE x WHEN 5 THEN true ELSE false END); + +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubtables%' ORDER BY 1) s$$) + ORDER BY c) s; + +ALTER PUBLICATION pubtables SET TABLE test ("column-1", x) WHERE (x > "column-1"), "publication-1"."test-pubs"; + +-- operations on a local table +ALTER PUBLICATION pubtables DROP TABLE "publication-1"."test-pubs"; +ALTER PUBLICATION pubtables ADD TABLE "publication-1"."test-pubs" (y); + +-- mixed operations +ALTER PUBLICATION pubtables SET TABLE test, TABLES IN SCHEMA "publication-1", TABLES IN SCHEMA current_schema; +ALTER PUBLICATION pubtables SET TABLE "publication-1"."test-pubs", test ("column-1", x) WHERE (x > "column-1"); + +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubtables%' ORDER BY 1) s$$) + ORDER BY c) s; + +-- publication with schemas +CREATE PUBLICATION "pub-mix" FOR TABLE test, TABLES IN SCHEMA current_schema, TABLE "publication-1"."test-pubs", TABLES IN SCHEMA "publication-1"; + +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pub-mix%' ORDER BY 1) s$$) + ORDER BY c) s; + +-- publication on a partitioned table +CREATE PUBLICATION pubpartitioned FOR TABLE testpub_partitioned (a, b) WITH (publish_via_partition_root = 'true'); +ALTER PUBLICATION pubpartitioned SET (publish_via_partition_root = 1); + +SELECT alter_distributed_table('testpub_partitioned', shard_count := 6, cascade_to_colocated := true); + +SELECT DISTINCT c FROM ( + SELECT unnest(result::text[]) c + FROM run_command_on_workers($$ + SELECT array_agg(c) FROM (SELECT c FROM unnest(activate_node_snapshot()) c WHERE c LIKE '%CREATE PUBLICATION%' AND c LIKE '%pubpartitioned%' ORDER BY 1) s$$) + ORDER BY c) s; + +-- make sure we propagate schema dependencies +SET citus.create_object_propagation TO 'deferred'; +BEGIN; +CREATE SCHEMA deptest; +END; +CREATE PUBLICATION pubdep FOR TABLES IN SCHEMA deptest; +RESET citus.create_object_propagation; +DROP SCHEMA deptest CASCADE; + +-- make sure we can sync all the publication metadata +SELECT start_metadata_sync_to_all_nodes(); + +DROP PUBLICATION pubdep; +DROP PUBLICATION "pub-mix"; +DROP PUBLICATION pubtables; +DROP PUBLICATION pubpartitioned; + +SET client_min_messages TO ERROR; +DROP SCHEMA publication CASCADE; +DROP SCHEMA "publication-1" CASCADE; + +SELECT citus_remove_node('localhost', :master_port); diff --git a/src/test/regress/sql/shard_rebalancer.sql b/src/test/regress/sql/shard_rebalancer.sql index 02a6df666..da4259f5b 100644 --- a/src/test/regress/sql/shard_rebalancer.sql +++ b/src/test/regress/sql/shard_rebalancer.sql @@ -1462,6 +1462,56 @@ DROP VIEW table_placements_per_node; DELETE FROM pg_catalog.pg_dist_rebalance_strategy WHERE name='capacity_high_worker_2'; DELETE FROM pg_catalog.pg_dist_rebalance_strategy WHERE name='only_worker_1'; +-- add colocation groups with shard group count < worker count +-- the rebalancer should balance those "unbalanced shards" evenly as much as possible +SELECT 1 FROM citus_remove_node('localhost', :worker_2_port); +create table single_shard_colocation_1a (a int primary key); +create table single_shard_colocation_1b (a int primary key); +create table single_shard_colocation_1c (a int primary key); +SET citus.shard_replication_factor = 1; +select create_distributed_table('single_shard_colocation_1a','a', colocate_with => 'none', shard_count => 1); +select create_distributed_table('single_shard_colocation_1b','a',colocate_with=>'single_shard_colocation_1a'); +select create_distributed_table('single_shard_colocation_1c','a',colocate_with=>'single_shard_colocation_1b'); + +create table single_shard_colocation_2a (a bigint); +create table single_shard_colocation_2b (a bigint); +select create_distributed_table('single_shard_colocation_2a','a', colocate_with => 'none', shard_count => 1); +select create_distributed_table('single_shard_colocation_2b','a',colocate_with=>'single_shard_colocation_2a'); + +-- all shards are placed on the first worker node +SELECT sh.logicalrelid, pl.nodeport + FROM pg_dist_shard sh JOIN pg_dist_shard_placement pl ON sh.shardid = pl.shardid + WHERE sh.logicalrelid::text IN ('single_shard_colocation_1a', 'single_shard_colocation_1b', 'single_shard_colocation_1c', 'single_shard_colocation_2a', 'single_shard_colocation_2b') + ORDER BY sh.logicalrelid; + +-- add the second node back, then rebalance +ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 16; +select 1 from citus_add_node('localhost', :worker_2_port); +select rebalance_table_shards(); + +-- verify some shards are moved to the new node +SELECT sh.logicalrelid, pl.nodeport + FROM pg_dist_shard sh JOIN pg_dist_shard_placement pl ON sh.shardid = pl.shardid + WHERE sh.logicalrelid::text IN ('single_shard_colocation_1a', 'single_shard_colocation_1b', 'single_shard_colocation_1c', 'single_shard_colocation_2a', 'single_shard_colocation_2b') + ORDER BY sh.logicalrelid; + +DROP TABLE single_shard_colocation_1a, single_shard_colocation_1b, single_shard_colocation_1c, single_shard_colocation_2a, single_shard_colocation_2b CASCADE; + +-- verify we detect if one of the tables do not have a replica identity or primary key +-- and error out in case of shard transfer mode = auto +SELECT 1 FROM citus_remove_node('localhost', :worker_2_port); + +create table table_with_primary_key (a int primary key); +select create_distributed_table('table_with_primary_key','a'); +create table table_without_primary_key (a bigint); +select create_distributed_table('table_without_primary_key','a'); + +-- add the second node back, then rebalance +ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 16; +select 1 from citus_add_node('localhost', :worker_2_port); +select rebalance_table_shards(); + +DROP TABLE table_with_primary_key, table_without_primary_key; \c - - - :worker_1_port SET citus.enable_ddl_propagation TO OFF; REVOKE ALL ON SCHEMA public FROM testrole; diff --git a/src/test/regress/sql/shard_rebalancer_unit.sql b/src/test/regress/sql/shard_rebalancer_unit.sql index 51293a227..607be4710 100644 --- a/src/test/regress/sql/shard_rebalancer_unit.sql +++ b/src/test/regress/sql/shard_rebalancer_unit.sql @@ -530,3 +530,57 @@ SELECT unnest(shard_placement_rebalance_array( ]::json[], improvement_threshold := 0.6 )); + + +-- Test single shard colocation groups +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}']::json[], + ARRAY['{"shardid":1, "cost":20, "nodename":"a"}', + '{"shardid":2, "cost":10, "nodename":"a", "next_colocation": true}', + '{"shardid":3, "cost":10, "nodename":"a", "next_colocation": true}', + '{"shardid":4, "cost":100, "nodename":"a", "next_colocation": true}', + '{"shardid":5, "cost":50, "nodename":"a", "next_colocation": true}', + '{"shardid":6, "cost":50, "nodename":"a", "next_colocation": true}' + ]::json[], + improvement_threshold := 0.1 +)); + + +-- Test colocation groups with shard count < worker count +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}', + '{"node_name": "c"}']::json[], + ARRAY['{"shardid":1, "cost":20, "nodename":"a"}', + '{"shardid":2, "cost":10, "nodename":"a"}', + '{"shardid":3, "cost":10, "nodename":"a", "next_colocation": true}', + '{"shardid":4, "cost":100, "nodename":"a"}', + '{"shardid":5, "cost":50, "nodename":"a", "next_colocation": true}', + '{"shardid":6, "cost":50, "nodename":"a"}' + ]::json[], + improvement_threshold := 0.1 +)); + + +-- Test colocation groups with shard count < worker count +-- mixed with a colocation group shard_count > worker count +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}', + '{"node_name": "c"}']::json[], + ARRAY['{"shardid":1, "cost":20, "nodename":"a"}', + '{"shardid":2, "cost":10, "nodename":"a"}', + '{"shardid":3, "cost":10, "nodename":"a", "next_colocation": true}', + '{"shardid":4, "cost":100, "nodename":"a"}', + '{"shardid":5, "cost":50, "nodename":"a", "next_colocation": true}', + '{"shardid":6, "cost":50, "nodename":"a"}', + '{"shardid":7, "cost":50, "nodename":"b", "next_colocation": true}', + '{"shardid":8, "cost":50, "nodename":"b"}', + '{"shardid":9, "cost":50, "nodename":"b"}', + '{"shardid":10, "cost":50, "nodename":"b"}', + '{"shardid":11, "cost":50, "nodename":"b"}', + '{"shardid":12, "cost":50, "nodename":"b"}' + ]::json[], + improvement_threshold := 0.1 +)); diff --git a/src/test/regress/sql/single_node.sql b/src/test/regress/sql/single_node.sql index 7bbbda895..3419025af 100644 --- a/src/test/regress/sql/single_node.sql +++ b/src/test/regress/sql/single_node.sql @@ -975,6 +975,7 @@ SELECT pg_sleep(0.1); -- backend(s) that execute on the shards will be terminated -- so show that there no internal backends SET search_path TO single_node; +SET citus.next_shard_id TO 90730500; SELECT count(*) from should_commit; SELECT count(*) FROM pg_stat_activity WHERE application_name LIKE 'citus_internal%'; SELECT get_all_active_client_backend_count(); @@ -998,6 +999,7 @@ ROLLBACK; \c - - - :master_port SET search_path TO single_node; +SET citus.next_shard_id TO 90830500; -- simulate that even if there is no connection slots -- to connect, Citus can switch to local execution @@ -1069,14 +1071,14 @@ SELECT create_distributed_table('test_disabling_drop_and_truncate', 'a'); SET citus.enable_manual_changes_to_shards TO off; -- these should error out -DROP TABLE test_disabling_drop_and_truncate_102040; -TRUNCATE TABLE test_disabling_drop_and_truncate_102040; +DROP TABLE test_disabling_drop_and_truncate_90830500; +TRUNCATE TABLE test_disabling_drop_and_truncate_90830500; RESET citus.enable_manual_changes_to_shards ; -- these should work as expected -TRUNCATE TABLE test_disabling_drop_and_truncate_102040; -DROP TABLE test_disabling_drop_and_truncate_102040; +TRUNCATE TABLE test_disabling_drop_and_truncate_90830500; +DROP TABLE test_disabling_drop_and_truncate_90830500; DROP TABLE test_disabling_drop_and_truncate; @@ -1086,10 +1088,10 @@ SELECT create_distributed_table('test_creating_distributed_relation_table_from_s -- these should error because shards cannot be used to: -- create distributed table -SELECT create_distributed_table('test_creating_distributed_relation_table_from_shard_102044', 'a'); +SELECT create_distributed_table('test_creating_distributed_relation_table_from_shard_90830504', 'a'); -- create reference table -SELECT create_reference_table('test_creating_distributed_relation_table_from_shard_102044'); +SELECT create_reference_table('test_creating_distributed_relation_table_from_shard_90830504'); RESET citus.shard_replication_factor; DROP TABLE test_creating_distributed_relation_table_from_shard; diff --git a/src/test/regress/sql/single_node_truncate.sql b/src/test/regress/sql/single_node_truncate.sql index 5b555ff91..faaae7858 100644 --- a/src/test/regress/sql/single_node_truncate.sql +++ b/src/test/regress/sql/single_node_truncate.sql @@ -1,6 +1,7 @@ CREATE SCHEMA single_node_truncate; SET search_path TO single_node_truncate; SET citus.shard_replication_factor TO 1; +SET citus.next_shard_id TO 91630500; -- helper view that prints out local table names and sizes in the schema CREATE VIEW table_sizes AS diff --git a/src/test/regress/sql/split_shard.sql b/src/test/regress/sql/split_shard.sql index f7c105076..1e601fb4f 100644 --- a/src/test/regress/sql/split_shard.sql +++ b/src/test/regress/sql/split_shard.sql @@ -64,7 +64,9 @@ CREATE TABLE table_to_split_2(id bigserial PRIMARY KEY, value char); CREATE TABLE table_to_split_3(id bigserial PRIMARY KEY, value char); -- Create publication at worker1 +SET citus.enable_ddl_propagation TO off; CREATE PUBLICATION pub1 FOR TABLE table_to_split_1, table_to_split_2, table_to_split_3; +RESET citus.enable_ddl_propagation; SELECT count(*) FROM pg_catalog.worker_split_shard_replication_setup(ARRAY[ ROW(1, 'id', 2, '-2147483648', '-1', :worker_2_node)::pg_catalog.split_shard_info, @@ -176,7 +178,9 @@ SELECT nodeid AS worker_2_node FROM pg_dist_node WHERE nodeport=:worker_2_port \ SET search_path TO split_shard_replication_setup_schema; -- Create publication at worker1 +SET citus.enable_ddl_propagation TO off; CREATE PUBLICATION pub1 FOR TABLE table_to_split_1, table_to_split_2, table_to_split_3; +RESET citus.enable_ddl_propagation; SELECT count(*) FROM pg_catalog.worker_split_shard_replication_setup(ARRAY[ ROW(1, 'id', 2, '-2147483648', '-1', :worker_1_node)::pg_catalog.split_shard_info, @@ -282,7 +286,9 @@ SET search_path TO split_shard_replication_setup_schema; SET client_min_messages TO ERROR; -- Create publication at worker1 +SET citus.enable_ddl_propagation TO off; CREATE PUBLICATION pub1 for table table_to_split_1, table_to_split_2, table_to_split_3; +RESET citus.enable_ddl_propagation; -- Worker1 is target for table_to_split_2 and table_to_split_3 SELECT count(*) FROM pg_catalog.worker_split_shard_replication_setup(ARRAY[ @@ -401,8 +407,10 @@ CREATE TABLE table_second_9(id bigserial PRIMARY KEY, value char); -- Create publication at worker1 \c - postgres - :worker_1_port SET search_path TO split_shard_replication_setup_schema; +SET citus.enable_ddl_propagation TO off; CREATE PUBLICATION pub1 FOR TABLE table_first_4, table_first_5, table_first_6; CREATE PUBLICATION pub2 FOR TABLE table_second_7, table_second_8, table_second_9; +RESET citus.enable_ddl_propagation; SELECT count(*) FROM pg_catalog.worker_split_shard_replication_setup(ARRAY[ ROW(4, 'id', 5, '-2147483648', '-1', :worker_2_node)::pg_catalog.split_shard_info, diff --git a/src/test/regress/sql/upgrade_citus_finish_citus_upgrade.sql b/src/test/regress/sql/upgrade_citus_finish_citus_upgrade.sql index bc2c40b0c..a326fb0a4 100644 --- a/src/test/regress/sql/upgrade_citus_finish_citus_upgrade.sql +++ b/src/test/regress/sql/upgrade_citus_finish_citus_upgrade.sql @@ -17,3 +17,8 @@ FROM pg_dist_node_metadata, pg_extension WHERE extname = 'citus'; -- still, do not NOTICE the version as it changes per release SET client_min_messages TO WARNING; CALL citus_finish_citus_upgrade(); + +-- we should be able to sync metadata in nontransactional way as well +SET citus.metadata_sync_mode TO 'nontransactional'; +SELECT start_metadata_sync_to_all_nodes(); +RESET citus.metadata_sync_mode; diff --git a/src/test/regress/sql/upgrade_columnar_after.sql b/src/test/regress/sql/upgrade_columnar_after.sql index f2839645c..133fcfde0 100644 --- a/src/test/regress/sql/upgrade_columnar_after.sql +++ b/src/test/regress/sql/upgrade_columnar_after.sql @@ -101,10 +101,12 @@ BEGIN; INSERT INTO text_data (value) SELECT generate_random_string(1024 * 10) FROM generate_series(0,10); SELECT count(DISTINCT value) FROM text_data; - -- make sure that serial is preserved - -- since we run "after schedule" twice and "rollback" wouldn't undo - -- sequence changes, it can be 22 or 33, not a different value - SELECT max(id) in (22, 33) FROM text_data; + -- Make sure that serial is preserved. + -- + -- Since we might run "after schedule" several times for flaky test + -- detection and "rollback" wouldn't undo sequence changes, "id" should + -- look like below: + SELECT max(id) >= 11 AND max(id) % 11 = 0 FROM text_data; -- since we run "after schedule" twice, rollback the transaction -- to avoid getting "table already exists" errors @@ -137,7 +139,12 @@ ROLLBACK; SELECT pg_class.oid INTO columnar_schema_members FROM pg_class, pg_namespace WHERE pg_namespace.oid=pg_class.relnamespace AND - pg_namespace.nspname='columnar_internal'; + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); SELECT refobjid INTO columnar_schema_members_pg_depend FROM pg_depend WHERE classid = 'pg_am'::regclass::oid AND @@ -153,19 +160,24 @@ UNION (TABLE columnar_schema_members_pg_depend EXCEPT TABLE columnar_schema_members); -- ... , and both columnar_schema_members_pg_depend & columnar_schema_members --- should have 10 entries. -SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; +-- should have 5 entries. +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; -- Check the same for workers too. -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ SELECT pg_class.oid INTO columnar_schema_members FROM pg_class, pg_namespace WHERE pg_namespace.oid=pg_class.relnamespace AND - pg_namespace.nspname='columnar_internal'; + pg_namespace.nspname='columnar_internal' AND + pg_class.relname NOT IN ('chunk_group_pkey', + 'chunk_pkey', + 'options_pkey', + 'stripe_first_row_number_idx', + 'stripe_pkey'); SELECT refobjid INTO columnar_schema_members_pg_depend FROM pg_depend WHERE classid = 'pg_am'::regclass::oid AND @@ -177,7 +189,7 @@ WHERE classid = 'pg_am'::regclass::oid AND $$ ); -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ (TABLE columnar_schema_members EXCEPT TABLE columnar_schema_members_pg_depend) UNION @@ -185,13 +197,13 @@ UNION $$ ); -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ -SELECT COUNT(*)=10 FROM columnar_schema_members_pg_depend; +SELECT COUNT(*)=5 FROM columnar_schema_members_pg_depend; $$ ); -SELECT run_command_on_workers( +SELECT success, result FROM run_command_on_workers( $$ DROP TABLE columnar_schema_members, columnar_schema_members_pg_depend; $$ diff --git a/src/test/regress/sql/upgrade_columnar_before.sql b/src/test/regress/sql/upgrade_columnar_before.sql index ea71dba02..6f39f4234 100644 --- a/src/test/regress/sql/upgrade_columnar_before.sql +++ b/src/test/regress/sql/upgrade_columnar_before.sql @@ -1,5 +1,29 @@ -- Test if relying on topological sort of the objects, not their names, works -- fine when re-creating objects during pg_upgrade. + +DO +$$ +BEGIN +IF EXISTS (SELECT * FROM pg_namespace WHERE nspname = 'upgrade_columnar') +THEN + -- Drop the the table leftover from the earlier run of + -- upgrade_columnar_before.sql. Similarly, drop the fake public schema + -- created before and rename the original one (renamed to citus_schema) + -- back to public. + -- + -- This can only happen if upgrade_columnar_before.sql is run multiple + -- times for flaky test detection. + DROP TABLE citus_schema.new_columnar_table; + DROP SCHEMA public CASCADE; + ALTER SCHEMA citus_schema RENAME TO public; + + SET LOCAL client_min_messages TO WARNING; + DROP SCHEMA upgrade_columnar CASCADE; +END IF; +END +$$ +LANGUAGE plpgsql; + ALTER SCHEMA public RENAME TO citus_schema; SET search_path TO citus_schema; diff --git a/src/test/regress/sql/upgrade_post_11_after.sql b/src/test/regress/sql/upgrade_post_11_after.sql index e38491593..946c52ae2 100644 --- a/src/test/regress/sql/upgrade_post_11_after.sql +++ b/src/test/regress/sql/upgrade_post_11_after.sql @@ -4,7 +4,7 @@ SET search_path = post_11_upgrade; SELECT pg_identify_object_as_address(classid, objid, objsubid) FROM pg_catalog.pg_dist_object WHERE objid IN ('post_11_upgrade'::regnamespace, 'post_11_upgrade.part_table'::regclass, 'post_11_upgrade.sensors'::regclass, 'post_11_upgrade.func_in_transaction_def'::regproc, 'post_11_upgrade.partial_index_test_config'::regconfig, 'post_11_upgrade.my_type'::regtype, 'post_11_upgrade.employees'::regclass, 'post_11_upgrade.view_for_upgrade_test'::regclass, 'post_11_upgrade.my_type_for_view'::regtype, 'post_11_upgrade.view_for_upgrade_test_my_type'::regclass, 'post_11_upgrade.non_dist_table_for_view'::regclass, 'post_11_upgrade.non_dist_upgrade_test_view'::regclass, 'post_11_upgrade.non_dist_upgrade_test_view_local_join'::regclass, 'post_11_upgrade.non_dist_upgrade_multiple_dist_view'::regclass, 'post_11_upgrade.non_dist_upgrade_ref_view'::regclass, 'post_11_upgrade.non_dist_upgrade_ref_view_2'::regclass, 'post_11_upgrade.reporting_line'::regclass, 'post_11_upgrade.v_test_1'::regclass, 'post_11_upgrade.v_test_2'::regclass, 'post_11_upgrade.owned_by_extension_table'::regclass, 'post_11_upgrade.materialized_view'::regclass, 'post_11_upgrade.owned_by_extension_view'::regclass, 'post_11_upgrade.local_type'::regtype, 'post_11_upgrade.non_dist_dist_table_for_view'::regclass, 'post_11_upgrade.depends_on_nothing_1'::regclass, 'post_11_upgrade.depends_on_nothing_2'::regclass, 'post_11_upgrade.depends_on_pg'::regclass, 'post_11_upgrade.depends_on_citus'::regclass, 'post_11_upgrade.depends_on_seq'::regclass, 'post_11_upgrade.depends_on_seq_and_no_support'::regclass) ORDER BY 1; -- on all nodes -SELECT run_command_on_workers($$SELECT array_agg(pg_identify_object_as_address(classid, objid, objsubid)) FROM pg_catalog.pg_dist_object WHERE objid IN ('post_11_upgrade'::regnamespace, 'post_11_upgrade.part_table'::regclass, 'post_11_upgrade.sensors'::regclass, 'post_11_upgrade.func_in_transaction_def'::regproc, 'post_11_upgrade.partial_index_test_config'::regconfig, 'post_11_upgrade.my_type'::regtype, 'post_11_upgrade.view_for_upgrade_test'::regclass, 'post_11_upgrade.view_for_upgrade_test_my_type'::regclass, 'post_11_upgrade.non_dist_upgrade_ref_view_2'::regclass, 'post_11_upgrade.reporting_line'::regclass) ORDER BY 1;$$) ORDER BY 1; +SELECT run_command_on_workers($$SELECT array_agg(worker_object) FROM (SELECT pg_identify_object_as_address(classid, objid, objsubid) worker_object FROM pg_catalog.pg_dist_object WHERE objid IN ('post_11_upgrade'::regnamespace, 'post_11_upgrade.part_table'::regclass, 'post_11_upgrade.sensors'::regclass, 'post_11_upgrade.func_in_transaction_def'::regproc, 'post_11_upgrade.partial_index_test_config'::regconfig, 'post_11_upgrade.my_type'::regtype, 'post_11_upgrade.view_for_upgrade_test'::regclass, 'post_11_upgrade.view_for_upgrade_test_my_type'::regclass, 'post_11_upgrade.non_dist_upgrade_ref_view_2'::regclass, 'post_11_upgrade.reporting_line'::regclass) ORDER BY 1) worker_objects;$$) ORDER BY 1; -- Create the necessary test utility function CREATE OR REPLACE FUNCTION activate_node_snapshot() diff --git a/src/test/regress/sql/worker_split_copy_test.sql b/src/test/regress/sql/worker_split_copy_test.sql index 2fac91c69..e2f4f9a23 100644 --- a/src/test/regress/sql/worker_split_copy_test.sql +++ b/src/test/regress/sql/worker_split_copy_test.sql @@ -110,8 +110,66 @@ SELECT COUNT(*) FROM worker_split_copy_test."test !/ \n _""dist_123_table_810700 SELECT COUNT(*) FROM worker_split_copy_test."test !/ \n _""dist_123_table_81070016"; -- END: List updated row count for local targets shard. +-- Check that GENERATED columns are handled properly in a shard split operation. +\c - - - :master_port +SET search_path TO worker_split_copy_test; +SET citus.shard_count TO 2; +SET citus.shard_replication_factor TO 1; +SET citus.next_shard_id TO 81080000; + +-- BEGIN: Create distributed table and insert data. +CREATE TABLE worker_split_copy_test.dist_table_with_generated_col(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char, col_todrop int); +SELECT create_distributed_table('dist_table_with_generated_col', 'id'); + +-- Check that dropped columns are filtered out in COPY command. +ALTER TABLE dist_table_with_generated_col DROP COLUMN col_todrop; + +INSERT INTO dist_table_with_generated_col (id, value) (SELECT g.id, 'N' FROM generate_series(1, 1000) AS g(id)); + +-- END: Create distributed table and insert data. + +-- BEGIN: Create target shards in Worker1 and Worker2 for a 2-way split copy. +\c - - - :worker_1_port +CREATE TABLE worker_split_copy_test.dist_table_with_generated_col_81080015(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char); +\c - - - :worker_2_port +CREATE TABLE worker_split_copy_test.dist_table_with_generated_col_81080016(id int primary key, new_id int GENERATED ALWAYS AS ( id + 3 ) stored, value char); + +-- BEGIN: List row count for source shard and targets shard in Worker1. +\c - - - :worker_1_port +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080000; +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080015; + +-- BEGIN: List row count for target shard in Worker2. +\c - - - :worker_2_port +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080016; + +\c - - - :worker_1_port +SELECT * from worker_split_copy( + 81080000, -- source shard id to copy + 'id', + ARRAY[ + -- split copy info for split children 1 + ROW(81080015, -- destination shard id + -2147483648, -- split range begin + -1073741824, --split range end + :worker_1_node)::pg_catalog.split_copy_info, + -- split copy info for split children 2 + ROW(81080016, --destination shard id + -1073741823, --split range begin + -1, --split range end + :worker_2_node)::pg_catalog.split_copy_info + ] + ); + +\c - - - :worker_1_port +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080015; + +\c - - - :worker_2_port +SELECT COUNT(*) FROM worker_split_copy_test.dist_table_with_generated_col_81080016; + -- BEGIN: CLEANUP. \c - - - :master_port SET client_min_messages TO WARNING; +CALL citus_cleanup_orphaned_resources(); DROP SCHEMA worker_split_copy_test CASCADE; -- END: CLEANUP. diff --git a/src/test/regress/sql_schedule b/src/test/regress/sql_schedule index f07f7af9a..9538f1482 100644 --- a/src/test/regress/sql_schedule +++ b/src/test/regress/sql_schedule @@ -14,3 +14,5 @@ test: arbitrary_configs_truncate test: arbitrary_configs_truncate_cascade test: arbitrary_configs_truncate_partition test: arbitrary_configs_alter_table_add_constraint_without_name +test: merge_arbitrary +test: arbitrary_configs_router