mirror of https://github.com/citusdata/citus.git
Merge branch 'main' into sqlancer-test-gha
commit
201d976a3b
|
@ -6,7 +6,7 @@ orbs:
|
|||
parameters:
|
||||
image_suffix:
|
||||
type: string
|
||||
default: '-vc4b1573'
|
||||
default: '-v087ecd7'
|
||||
pg13_version:
|
||||
type: string
|
||||
default: '13.10'
|
||||
|
@ -201,6 +201,9 @@ jobs:
|
|||
- run:
|
||||
name: 'Check if all GUCs are sorted alphabetically'
|
||||
command: ci/check_gucs_are_alphabetically_sorted.sh
|
||||
- run:
|
||||
name: 'Check for missing downgrade scripts'
|
||||
command: ci/check_migration_files.sh
|
||||
|
||||
check-sql-snapshots:
|
||||
docker:
|
||||
|
@ -266,6 +269,41 @@ jobs:
|
|||
- coverage:
|
||||
flags: 'test_<< parameters.old_pg_major >>_<< parameters.new_pg_major >>,upgrade'
|
||||
|
||||
test-pytest:
|
||||
description: Runs pytest based tests
|
||||
parameters:
|
||||
pg_major:
|
||||
description: 'postgres major version'
|
||||
type: integer
|
||||
image:
|
||||
description: 'docker image to use as for the tests'
|
||||
type: string
|
||||
default: citus/failtester
|
||||
image_tag:
|
||||
description: 'docker image tag to use'
|
||||
type: string
|
||||
docker:
|
||||
- image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>'
|
||||
working_directory: /home/circleci/project
|
||||
steps:
|
||||
- checkout
|
||||
- attach_workspace:
|
||||
at: .
|
||||
- install_extension:
|
||||
pg_major: << parameters.pg_major >>
|
||||
- configure
|
||||
- enable_core
|
||||
- run:
|
||||
name: 'Run pytest'
|
||||
command: |
|
||||
gosu circleci \
|
||||
make -C src/test/regress check-pytest
|
||||
no_output_timeout: 2m
|
||||
- stack_trace
|
||||
- coverage:
|
||||
flags: 'test_<< parameters.pg_major >>,pytest'
|
||||
|
||||
|
||||
test-arbitrary-configs:
|
||||
description: Runs tests on arbitrary configs
|
||||
parallelism: 6
|
||||
|
@ -452,6 +490,10 @@ jobs:
|
|||
pg_major: << parameters.pg_major >>
|
||||
- configure
|
||||
- enable_core
|
||||
- run:
|
||||
name: 'Install DBI.pm'
|
||||
command: |
|
||||
apt-get update && apt-get install libdbi-perl && apt-get install libdbd-pg-perl
|
||||
- run:
|
||||
name: 'Run Test'
|
||||
command: |
|
||||
|
@ -551,7 +593,7 @@ jobs:
|
|||
testForDebugging="<< parameters.test >>"
|
||||
|
||||
if [ -z "$testForDebugging" ]; then
|
||||
detected_changes=$(git diff origin/main... --name-only --diff-filter=AM | (grep 'src/test/regress/sql/.*.sql\|src/test/regress/spec/.*.spec' || true))
|
||||
detected_changes=$(git diff origin/main... --name-only --diff-filter=AM | (grep 'src/test/regress/sql/.*\.sql\|src/test/regress/spec/.*\.spec\|src/test/regress/citus_tests/test/test_.*\.py' || true))
|
||||
tests=${detected_changes}
|
||||
else
|
||||
tests=$testForDebugging;
|
||||
|
@ -854,38 +896,30 @@ workflows:
|
|||
image: citus/failtester
|
||||
make: check-failure
|
||||
|
||||
- tap-test-citus: &tap-test-citus-13
|
||||
name: 'test-13_tap-recovery'
|
||||
suite: recovery
|
||||
- test-pytest:
|
||||
name: 'test-13_pytest'
|
||||
pg_major: 13
|
||||
image_tag: '<< pipeline.parameters.pg13_version >>'
|
||||
requires: [build-13]
|
||||
- tap-test-citus:
|
||||
<<: *tap-test-citus-13
|
||||
name: 'test-13_tap-columnar-freezing'
|
||||
suite: columnar_freezing
|
||||
|
||||
- tap-test-citus: &tap-test-citus-14
|
||||
name: 'test-14_tap-recovery'
|
||||
suite: recovery
|
||||
- test-pytest:
|
||||
name: 'test-14_pytest'
|
||||
pg_major: 14
|
||||
image_tag: '<< pipeline.parameters.pg14_version >>'
|
||||
requires: [build-14]
|
||||
- tap-test-citus:
|
||||
<<: *tap-test-citus-14
|
||||
name: 'test-14_tap-columnar-freezing'
|
||||
suite: columnar_freezing
|
||||
|
||||
- tap-test-citus: &tap-test-citus-15
|
||||
name: 'test-15_tap-recovery'
|
||||
suite: recovery
|
||||
- test-pytest:
|
||||
name: 'test-15_pytest'
|
||||
pg_major: 15
|
||||
image_tag: '<< pipeline.parameters.pg15_version >>'
|
||||
requires: [build-15]
|
||||
|
||||
- tap-test-citus:
|
||||
<<: *tap-test-citus-15
|
||||
name: 'test-15_tap-columnar-freezing'
|
||||
suite: columnar_freezing
|
||||
name: 'test-15_tap-cdc'
|
||||
suite: cdc
|
||||
pg_major: 15
|
||||
image_tag: '<< pipeline.parameters.pg15_version >>'
|
||||
requires: [build-15]
|
||||
|
||||
- test-arbitrary-configs:
|
||||
name: 'test-13_check-arbitrary-configs'
|
||||
|
@ -936,8 +970,6 @@ workflows:
|
|||
- test-13_check-follower-cluster
|
||||
- test-13_check-columnar
|
||||
- test-13_check-columnar-isolation
|
||||
- test-13_tap-recovery
|
||||
- test-13_tap-columnar-freezing
|
||||
- test-13_check-failure
|
||||
- test-13_check-enterprise
|
||||
- test-13_check-enterprise-isolation
|
||||
|
@ -956,8 +988,6 @@ workflows:
|
|||
- test-14_check-follower-cluster
|
||||
- test-14_check-columnar
|
||||
- test-14_check-columnar-isolation
|
||||
- test-14_tap-recovery
|
||||
- test-14_tap-columnar-freezing
|
||||
- test-14_check-failure
|
||||
- test-14_check-enterprise
|
||||
- test-14_check-enterprise-isolation
|
||||
|
@ -976,8 +1006,6 @@ workflows:
|
|||
- test-15_check-follower-cluster
|
||||
- test-15_check-columnar
|
||||
- test-15_check-columnar-isolation
|
||||
- test-15_tap-recovery
|
||||
- test-15_tap-columnar-freezing
|
||||
- test-15_check-failure
|
||||
- test-15_check-enterprise
|
||||
- test-15_check-enterprise-isolation
|
||||
|
|
|
@ -17,7 +17,7 @@ trim_trailing_whitespace = true
|
|||
insert_final_newline = unset
|
||||
trim_trailing_whitespace = unset
|
||||
|
||||
[*.{sql,sh,py}]
|
||||
[*.{sql,sh,py,toml}]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
tab_width = 4
|
||||
|
|
3
.flake8
3
.flake8
|
@ -1,7 +1,6 @@
|
|||
[flake8]
|
||||
# E203 is ignored for black
|
||||
# E402 is ignored because of te way we do relative imports
|
||||
extend-ignore = E203, E402
|
||||
extend-ignore = E203
|
||||
# black will truncate to 88 characters usually, but long string literals it
|
||||
# might keep. That's fine in most cases unless it gets really excessive.
|
||||
max-line-length = 150
|
||||
|
|
|
@ -157,7 +157,6 @@ jobs:
|
|||
|
||||
apt-get update -y
|
||||
## Install required packages to execute packaging tools for deb based distros
|
||||
apt install python3-dev python3-pip -y
|
||||
sudo apt-get purge -y python3-yaml
|
||||
python3 -m pip install --upgrade pip setuptools==57.5.0
|
||||
apt-get install python3-dev python3-pip -y
|
||||
apt-get purge -y python3-yaml
|
||||
./.github/packaging/validate_build_output.sh "deb"
|
||||
|
|
|
@ -283,6 +283,14 @@ actually run in CI. This is most commonly forgotten for newly added CI tests
|
|||
that the developer only ran locally. It also checks that all CI scripts have a
|
||||
section in this `README.md` file and that they include `ci/ci_helpers.sh`.
|
||||
|
||||
## `check_migration_files.sh`
|
||||
|
||||
A branch that touches a set of upgrade scripts is also expected to touch
|
||||
corresponding downgrade scripts as well. If this script fails, read the output
|
||||
and make sure you update the downgrade scripts in the printed list. If you
|
||||
really don't need a downgrade to run any SQL. You can write a comment in the
|
||||
file explaining why a downgrade step is not necessary.
|
||||
|
||||
## `disallow_c_comments_in_migrations.sh`
|
||||
|
||||
We do not use C-style comments in migration files as the stripped
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
#! /bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
# shellcheck disable=SC1091
|
||||
source ci/ci_helpers.sh
|
||||
|
||||
# This file checks for the existence of downgrade scripts for every upgrade script that is changed in the branch.
|
||||
|
||||
# create list of migration files for upgrades
|
||||
upgrade_files=$(git diff --name-only origin/main | { grep "src/backend/distributed/sql/citus--.*sql" || exit 0 ; })
|
||||
downgrade_files=$(git diff --name-only origin/main | { grep "src/backend/distributed/sql/downgrades/citus--.*sql" || exit 0 ; })
|
||||
ret_value=0
|
||||
|
||||
for file in $upgrade_files
|
||||
do
|
||||
# There should always be 2 matches, and no need to avoid splitting here
|
||||
# shellcheck disable=SC2207
|
||||
versions=($(grep --only-matching --extended-regexp "[0-9]+\.[0-9]+[-.][0-9]+" <<< "$file"))
|
||||
|
||||
from_version=${versions[0]};
|
||||
to_version=${versions[1]};
|
||||
|
||||
downgrade_migration_file="src/backend/distributed/sql/downgrades/citus--$to_version--$from_version.sql"
|
||||
|
||||
# check for the existence of migration scripts
|
||||
if [[ $(grep --line-regexp --count "$downgrade_migration_file" <<< "$downgrade_files") == 0 ]]
|
||||
then
|
||||
echo "$file is updated, but $downgrade_migration_file is not updated in branch"
|
||||
ret_value=1
|
||||
fi
|
||||
done
|
||||
|
||||
exit $ret_value;
|
|
@ -3,3 +3,35 @@ profile = 'black'
|
|||
|
||||
[tool.black]
|
||||
include = '(src/test/regress/bin/diff-filter|\.pyi?|\.ipynb)$'
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = [
|
||||
"--import-mode=importlib",
|
||||
"--showlocals",
|
||||
"--tb=short",
|
||||
]
|
||||
pythonpath = 'src/test/regress/citus_tests'
|
||||
asyncio_mode = 'auto'
|
||||
|
||||
# Make test discovery quicker from the root dir of the repo
|
||||
testpaths = ['src/test/regress/citus_tests/test']
|
||||
|
||||
# Make test discovery quicker from other directories than root directory
|
||||
norecursedirs = [
|
||||
'*.egg',
|
||||
'.*',
|
||||
'build',
|
||||
'venv',
|
||||
'ci',
|
||||
'vendor',
|
||||
'backend',
|
||||
'bin',
|
||||
'include',
|
||||
'tmp_*',
|
||||
'results',
|
||||
'expected',
|
||||
'sql',
|
||||
'spec',
|
||||
'data',
|
||||
'__pycache__',
|
||||
]
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
# The directory used to store columnar sql files after pre-processing them
|
||||
# with 'cpp' in build-time, see src/backend/columnar/Makefile.
|
||||
/build/
|
|
@ -10,14 +10,51 @@ OBJS += \
|
|||
MODULE_big = citus_columnar
|
||||
EXTENSION = citus_columnar
|
||||
|
||||
columnar_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/*.sql))
|
||||
columnar_downgrade_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/downgrades/*.sql))
|
||||
DATA = $(columnar_sql_files) \
|
||||
$(columnar_downgrade_sql_files)
|
||||
template_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/*.sql))
|
||||
template_downgrade_sql_files = $(patsubst $(citus_abs_srcdir)/sql/downgrades/%,%,$(wildcard $(citus_abs_srcdir)/sql/downgrades/*.sql))
|
||||
generated_sql_files = $(patsubst %,$(citus_abs_srcdir)/build/%,$(template_sql_files))
|
||||
generated_downgrade_sql_files += $(patsubst %,$(citus_abs_srcdir)/build/sql/%,$(template_downgrade_sql_files))
|
||||
|
||||
DATA_built = $(generated_sql_files)
|
||||
|
||||
PG_CPPFLAGS += -I$(libpq_srcdir) -I$(safestringlib_srcdir)/include
|
||||
|
||||
include $(citus_top_builddir)/Makefile.global
|
||||
|
||||
.PHONY: install-all
|
||||
SQL_DEPDIR=.deps/sql
|
||||
SQL_BUILDDIR=build/sql
|
||||
|
||||
$(generated_sql_files): $(citus_abs_srcdir)/build/%: %
|
||||
@mkdir -p $(citus_abs_srcdir)/$(SQL_DEPDIR) $(citus_abs_srcdir)/$(SQL_BUILDDIR)
|
||||
@# -MF is used to store dependency files(.Po) in another directory for separation
|
||||
@# -MT is used to change the target of the rule emitted by dependency generation.
|
||||
@# -P is used to inhibit generation of linemarkers in the output from the preprocessor.
|
||||
@# -undef is used to not predefine any system-specific or GCC-specific macros.
|
||||
@# `man cpp` for further information
|
||||
cd $(citus_abs_srcdir) && cpp -undef -w -P -MMD -MP -MF$(SQL_DEPDIR)/$(*F).Po -MT$@ $< > $@
|
||||
|
||||
$(generated_downgrade_sql_files): $(citus_abs_srcdir)/build/sql/%: sql/downgrades/%
|
||||
@mkdir -p $(citus_abs_srcdir)/$(SQL_DEPDIR) $(citus_abs_srcdir)/$(SQL_BUILDDIR)
|
||||
@# -MF is used to store dependency files(.Po) in another directory for separation
|
||||
@# -MT is used to change the target of the rule emitted by dependency generation.
|
||||
@# -P is used to inhibit generation of linemarkers in the output from the preprocessor.
|
||||
@# -undef is used to not predefine any system-specific or GCC-specific macros.
|
||||
@# `man cpp` for further information
|
||||
cd $(citus_abs_srcdir) && cpp -undef -w -P -MMD -MP -MF$(SQL_DEPDIR)/$(*F).Po -MT$@ $< > $@
|
||||
|
||||
.PHONY: install install-downgrades install-all
|
||||
|
||||
cleanup-before-install:
|
||||
rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus_columnar.control
|
||||
rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/columnar--*
|
||||
rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus_columnar--*
|
||||
|
||||
install: cleanup-before-install
|
||||
|
||||
# install and install-downgrades should be run sequentially
|
||||
install-all: install
|
||||
$(MAKE) install-downgrades
|
||||
|
||||
install-downgrades: $(generated_downgrade_sql_files)
|
||||
$(INSTALL_DATA) $(generated_downgrade_sql_files) '$(DESTDIR)$(datadir)/$(datamoduledir)/'
|
||||
|
||||
|
|
|
@ -1 +1,19 @@
|
|||
-- citus_columnar--11.1-1--11.2-1
|
||||
|
||||
#include "udfs/columnar_ensure_am_depends_catalog/11.2-1.sql"
|
||||
|
||||
DELETE FROM pg_depend
|
||||
WHERE classid = 'pg_am'::regclass::oid
|
||||
AND objid IN (select oid from pg_am where amname = 'columnar')
|
||||
AND objsubid = 0
|
||||
AND refclassid = 'pg_class'::regclass::oid
|
||||
AND refobjid IN (
|
||||
'columnar_internal.stripe_first_row_number_idx'::regclass::oid,
|
||||
'columnar_internal.chunk_group_pkey'::regclass::oid,
|
||||
'columnar_internal.chunk_pkey'::regclass::oid,
|
||||
'columnar_internal.options_pkey'::regclass::oid,
|
||||
'columnar_internal.stripe_first_row_number_idx'::regclass::oid,
|
||||
'columnar_internal.stripe_pkey'::regclass::oid
|
||||
)
|
||||
AND refobjsubid = 0
|
||||
AND deptype = 'n';
|
||||
|
|
|
@ -1 +1,4 @@
|
|||
-- citus_columnar--11.2-1--11.1-1
|
||||
|
||||
-- Note that we intentionally do not re-insert the pg_depend records that we
|
||||
-- deleted via citus_columnar--11.1-1--11.2-1.sql.
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
CREATE OR REPLACE FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
|
||||
RETURNS void
|
||||
LANGUAGE plpgsql
|
||||
SET search_path = pg_catalog
|
||||
AS $func$
|
||||
BEGIN
|
||||
INSERT INTO pg_depend
|
||||
WITH columnar_schema_members(relid) AS (
|
||||
SELECT pg_class.oid AS relid FROM pg_class
|
||||
WHERE relnamespace =
|
||||
COALESCE(
|
||||
(SELECT pg_namespace.oid FROM pg_namespace WHERE nspname = 'columnar_internal'),
|
||||
(SELECT pg_namespace.oid FROM pg_namespace WHERE nspname = 'columnar')
|
||||
)
|
||||
AND relname IN ('chunk',
|
||||
'chunk_group',
|
||||
'options',
|
||||
'storageid_seq',
|
||||
'stripe')
|
||||
)
|
||||
SELECT -- Define a dependency edge from "columnar table access method" ..
|
||||
'pg_am'::regclass::oid as classid,
|
||||
(select oid from pg_am where amname = 'columnar') as objid,
|
||||
0 as objsubid,
|
||||
-- ... to some objects registered as regclass and that lives in
|
||||
-- "columnar" schema. That contains catalog tables and the sequences
|
||||
-- created in "columnar" schema.
|
||||
--
|
||||
-- Given the possibility of user might have created their own objects
|
||||
-- in columnar schema, we explicitly specify list of objects that we
|
||||
-- are interested in.
|
||||
'pg_class'::regclass::oid as refclassid,
|
||||
columnar_schema_members.relid as refobjid,
|
||||
0 as refobjsubid,
|
||||
'n' as deptype
|
||||
FROM columnar_schema_members
|
||||
-- Avoid inserting duplicate entries into pg_depend.
|
||||
EXCEPT TABLE pg_depend;
|
||||
END;
|
||||
$func$;
|
||||
COMMENT ON FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
|
||||
IS 'internal function responsible for creating dependencies from columnar '
|
||||
'table access method to the rel objects in columnar schema';
|
|
@ -1,4 +1,4 @@
|
|||
CREATE OR REPLACE FUNCTION citus_internal.columnar_ensure_am_depends_catalog()
|
||||
CREATE OR REPLACE FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
|
||||
RETURNS void
|
||||
LANGUAGE plpgsql
|
||||
SET search_path = pg_catalog
|
||||
|
@ -14,22 +14,17 @@ BEGIN
|
|||
)
|
||||
AND relname IN ('chunk',
|
||||
'chunk_group',
|
||||
'chunk_group_pkey',
|
||||
'chunk_pkey',
|
||||
'options',
|
||||
'options_pkey',
|
||||
'storageid_seq',
|
||||
'stripe',
|
||||
'stripe_first_row_number_idx',
|
||||
'stripe_pkey')
|
||||
'stripe')
|
||||
)
|
||||
SELECT -- Define a dependency edge from "columnar table access method" ..
|
||||
'pg_am'::regclass::oid as classid,
|
||||
(select oid from pg_am where amname = 'columnar') as objid,
|
||||
0 as objsubid,
|
||||
-- ... to each object that is registered to pg_class and that lives
|
||||
-- in "columnar" schema. That contains catalog tables, indexes
|
||||
-- created on them and the sequences created in "columnar" schema.
|
||||
-- ... to some objects registered as regclass and that lives in
|
||||
-- "columnar" schema. That contains catalog tables and the sequences
|
||||
-- created in "columnar" schema.
|
||||
--
|
||||
-- Given the possibility of user might have created their own objects
|
||||
-- in columnar schema, we explicitly specify list of objects that we
|
||||
|
@ -43,6 +38,6 @@ BEGIN
|
|||
EXCEPT TABLE pg_depend;
|
||||
END;
|
||||
$func$;
|
||||
COMMENT ON FUNCTION citus_internal.columnar_ensure_am_depends_catalog()
|
||||
COMMENT ON FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
|
||||
IS 'internal function responsible for creating dependencies from columnar '
|
||||
'table access method to the rel objects in columnar schema';
|
||||
|
|
|
@ -32,7 +32,13 @@ OBJS += \
|
|||
$(patsubst $(citus_abs_srcdir)/%.c,%.o,$(foreach dir,$(SUBDIRS), $(sort $(wildcard $(citus_abs_srcdir)/$(dir)/*.c))))
|
||||
|
||||
# be explicit about the default target
|
||||
all:
|
||||
.PHONY: cdc
|
||||
|
||||
all: cdc
|
||||
|
||||
cdc:
|
||||
echo "running cdc make"
|
||||
$(MAKE) DECODER=pgoutput -C cdc all
|
||||
|
||||
NO_PGXS = 1
|
||||
|
||||
|
@ -81,11 +87,19 @@ endif
|
|||
|
||||
.PHONY: clean-full install install-downgrades install-all
|
||||
|
||||
clean: clean-cdc
|
||||
|
||||
clean-cdc:
|
||||
$(MAKE) DECODER=pgoutput -C cdc clean
|
||||
|
||||
cleanup-before-install:
|
||||
rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus.control
|
||||
rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus--*
|
||||
|
||||
install: cleanup-before-install
|
||||
install: cleanup-before-install install-cdc
|
||||
|
||||
install-cdc:
|
||||
$(MAKE) DECODER=pgoutput -C cdc install
|
||||
|
||||
# install and install-downgrades should be run sequentially
|
||||
install-all: install
|
||||
|
@ -96,4 +110,5 @@ install-downgrades: $(generated_downgrade_sql_files)
|
|||
|
||||
clean-full:
|
||||
$(MAKE) clean
|
||||
$(MAKE) -C cdc clean-full
|
||||
rm -rf $(safestringlib_builddir)
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
ifndef DECODER
|
||||
DECODER = pgoutput
|
||||
endif
|
||||
|
||||
MODULE_big = citus_$(DECODER)
|
||||
citus_subdir = src/backend/distributed/cdc
|
||||
citus_top_builddir = ../../../..
|
||||
citus_decoders_dir = $(DESTDIR)$(pkglibdir)/citus_decoders
|
||||
|
||||
OBJS += cdc_decoder.o cdc_decoder_utils.o
|
||||
|
||||
include $(citus_top_builddir)/Makefile.global
|
||||
|
||||
override CFLAGS += -DDECODER=\"$(DECODER)\" -I$(citus_abs_top_srcdir)/include
|
||||
override CPPFLAGS += -DDECODER=\"$(DECODER)\" -I$(citus_abs_top_srcdir)/include
|
||||
|
||||
install: install-cdc
|
||||
|
||||
clean: clean-cdc
|
||||
|
||||
install-cdc:
|
||||
mkdir -p '$(citus_decoders_dir)'
|
||||
$(INSTALL_SHLIB) citus_$(DECODER).so '$(citus_decoders_dir)/$(DECODER).so'
|
||||
|
||||
clean-cdc:
|
||||
rm -f '$(DESTDIR)$(datadir)/$(datamoduledir)/citus_decoders/$(DECODER).so'
|
|
@ -0,0 +1,500 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* cdc_decoder.c
|
||||
* CDC Decoder plugin for Citus
|
||||
*
|
||||
* Copyright (c) Citus Data, Inc.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "cdc_decoder_utils.h"
|
||||
#include "postgres.h"
|
||||
#include "fmgr.h"
|
||||
|
||||
#include "access/genam.h"
|
||||
#include "catalog/pg_namespace.h"
|
||||
#include "catalog/pg_publication.h"
|
||||
#include "commands/extension.h"
|
||||
#include "common/hashfn.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/typcache.h"
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
|
||||
extern void _PG_output_plugin_init(OutputPluginCallbacks *cb);
|
||||
static LogicalDecodeChangeCB ouputPluginChangeCB;
|
||||
|
||||
static void InitShardToDistributedTableMap(void);
|
||||
|
||||
static void PublishDistributedTableChanges(LogicalDecodingContext *ctx,
|
||||
ReorderBufferTXN *txn,
|
||||
Relation relation,
|
||||
ReorderBufferChange *change);
|
||||
|
||||
|
||||
static bool replication_origin_filter_cb(LogicalDecodingContext *ctx, RepOriginId
|
||||
origin_id);
|
||||
|
||||
static void TranslateChangesIfSchemaChanged(Relation relation, Relation targetRelation,
|
||||
ReorderBufferChange *change);
|
||||
|
||||
static void TranslateAndPublishRelationForCDC(LogicalDecodingContext *ctx,
|
||||
ReorderBufferTXN *txn,
|
||||
Relation relation,
|
||||
ReorderBufferChange *change, Oid shardId,
|
||||
Oid targetRelationid);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint64 shardId;
|
||||
Oid distributedTableId;
|
||||
bool isReferenceTable;
|
||||
bool isNull;
|
||||
} ShardIdHashEntry;
|
||||
|
||||
static HTAB *shardToDistributedTableMap = NULL;
|
||||
|
||||
static void cdc_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
||||
Relation relation, ReorderBufferChange *change);
|
||||
|
||||
|
||||
/* build time macro for base decoder plugin name for CDC and Shard Split. */
|
||||
#ifndef DECODER
|
||||
#define DECODER "pgoutput"
|
||||
#endif
|
||||
|
||||
#define DECODER_INIT_FUNCTION_NAME "_PG_output_plugin_init"
|
||||
|
||||
#define CITUS_SHARD_TRANSFER_SLOT_PREFIX "citus_shard_"
|
||||
#define CITUS_SHARD_TRANSFER_SLOT_PREFIX_SIZE (sizeof(CITUS_SHARD_TRANSFER_SLOT_PREFIX) - \
|
||||
1)
|
||||
|
||||
/*
|
||||
* Postgres uses 'pgoutput' as default plugin for logical replication.
|
||||
* We want to reuse Postgres pgoutput's functionality as much as possible.
|
||||
* Hence we load all the functions of this plugin and override as required.
|
||||
*/
|
||||
void
|
||||
_PG_output_plugin_init(OutputPluginCallbacks *cb)
|
||||
{
|
||||
elog(LOG, "Initializing CDC decoder");
|
||||
|
||||
/*
|
||||
* We build custom .so files whose name matches common decoders (pgoutput, wal2json)
|
||||
* and place them in $libdir/citus_decoders/ such that administrators can configure
|
||||
* dynamic_library_path to include this directory, and users can then use the
|
||||
* regular decoder names when creating replications slots.
|
||||
*
|
||||
* To load the original decoder, we need to remove citus_decoders/ from the
|
||||
* dynamic_library_path.
|
||||
*/
|
||||
char *originalDLP = Dynamic_library_path;
|
||||
Dynamic_library_path = RemoveCitusDecodersFromPaths(Dynamic_library_path);
|
||||
|
||||
LogicalOutputPluginInit plugin_init =
|
||||
(LogicalOutputPluginInit) (void *)
|
||||
load_external_function(DECODER,
|
||||
DECODER_INIT_FUNCTION_NAME,
|
||||
false, NULL);
|
||||
|
||||
if (plugin_init == NULL)
|
||||
{
|
||||
elog(ERROR, "output plugins have to declare the _PG_output_plugin_init symbol");
|
||||
}
|
||||
|
||||
/* in case this session is used for different replication slots */
|
||||
Dynamic_library_path = originalDLP;
|
||||
|
||||
/* ask the output plugin to fill the callback struct */
|
||||
plugin_init(cb);
|
||||
|
||||
/* Initialize the Shard Id to Distributed Table id mapping hash table.*/
|
||||
InitShardToDistributedTableMap();
|
||||
|
||||
/* actual pgoutput callback function will be called */
|
||||
ouputPluginChangeCB = cb->change_cb;
|
||||
cb->change_cb = cdc_change_cb;
|
||||
cb->filter_by_origin_cb = replication_origin_filter_cb;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Check if the replication slot is for Shard transfer by checking for prefix.
|
||||
*/
|
||||
inline static
|
||||
bool
|
||||
IsShardTransferSlot(char *replicationSlotName)
|
||||
{
|
||||
return strncmp(replicationSlotName, CITUS_SHARD_TRANSFER_SLOT_PREFIX,
|
||||
CITUS_SHARD_TRANSFER_SLOT_PREFIX_SIZE) == 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* shard_split_and_cdc_change_cb function emits the incoming tuple change
|
||||
* to the appropriate destination shard.
|
||||
*/
|
||||
static void
|
||||
cdc_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
||||
Relation relation, ReorderBufferChange *change)
|
||||
{
|
||||
/*
|
||||
* If Citus has not been loaded yet, pass the changes
|
||||
* through to the undrelying decoder plugin.
|
||||
*/
|
||||
if (!CdcCitusHasBeenLoaded())
|
||||
{
|
||||
ouputPluginChangeCB(ctx, txn, relation, change);
|
||||
return;
|
||||
}
|
||||
|
||||
/* check if the relation is publishable.*/
|
||||
if (!is_publishable_relation(relation))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
char *replicationSlotName = ctx->slot->data.name.data;
|
||||
if (replicationSlotName == NULL)
|
||||
{
|
||||
elog(ERROR, "Replication slot name is NULL!");
|
||||
return;
|
||||
}
|
||||
|
||||
/* If the slot is for internal shard operations, call the base plugin's call back. */
|
||||
if (IsShardTransferSlot(replicationSlotName))
|
||||
{
|
||||
ouputPluginChangeCB(ctx, txn, relation, change);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Transalate the changes from shard to distributes table and publish. */
|
||||
PublishDistributedTableChanges(ctx, txn, relation, change);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* InitShardToDistributedTableMap initializes the hash table that is used to
|
||||
* translate the changes in the shard table to the changes in the distributed table.
|
||||
*/
|
||||
static void
|
||||
InitShardToDistributedTableMap()
|
||||
{
|
||||
HASHCTL info;
|
||||
memset(&info, 0, sizeof(info));
|
||||
info.keysize = sizeof(uint64);
|
||||
info.entrysize = sizeof(ShardIdHashEntry);
|
||||
info.hash = tag_hash;
|
||||
info.hcxt = CurrentMemoryContext;
|
||||
|
||||
int hashFlags = (HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION);
|
||||
shardToDistributedTableMap = hash_create("CDC Decoder translation hash table", 1024,
|
||||
&info, hashFlags);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AddShardIdToHashTable adds the shardId to the hash table.
|
||||
*/
|
||||
static Oid
|
||||
AddShardIdToHashTable(uint64 shardId, ShardIdHashEntry *entry)
|
||||
{
|
||||
entry->shardId = shardId;
|
||||
entry->distributedTableId = CdcLookupShardRelationFromCatalog(shardId, true);
|
||||
entry->isReferenceTable = CdcPartitionMethodViaCatalog(entry->distributedTableId) ==
|
||||
'n';
|
||||
return entry->distributedTableId;
|
||||
}
|
||||
|
||||
|
||||
static Oid
|
||||
LookupDistributedTableIdForShardId(uint64 shardId, bool *isReferenceTable)
|
||||
{
|
||||
bool found;
|
||||
Oid distributedTableId = InvalidOid;
|
||||
ShardIdHashEntry *entry = (ShardIdHashEntry *) hash_search(shardToDistributedTableMap,
|
||||
&shardId,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
if (found)
|
||||
{
|
||||
distributedTableId = entry->distributedTableId;
|
||||
}
|
||||
else
|
||||
{
|
||||
distributedTableId = AddShardIdToHashTable(shardId, entry);
|
||||
}
|
||||
*isReferenceTable = entry->isReferenceTable;
|
||||
return distributedTableId;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* replication_origin_filter_cb call back function filters out publication of changes
|
||||
* originated from any other node other than the current node. This is
|
||||
* identified by the "origin_id" of the changes. The origin_id is set to
|
||||
* a non-zero value in the origin node as part of WAL replication for internal
|
||||
* operations like shard split/moves/create_distributed_table etc.
|
||||
*/
|
||||
static bool
|
||||
replication_origin_filter_cb(LogicalDecodingContext *ctx, RepOriginId origin_id)
|
||||
{
|
||||
return (origin_id != InvalidRepOriginId);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* This function is responsible for translating the changes in the shard table to
|
||||
* the changes in the shell table and publishing the changes as a change to the
|
||||
* distributed table so that CDD clients are not aware of the shard tables. It also
|
||||
* handles schema changes to the distributed table.
|
||||
*/
|
||||
static void
|
||||
TranslateAndPublishRelationForCDC(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
||||
Relation relation, ReorderBufferChange *change, Oid
|
||||
shardId, Oid targetRelationid)
|
||||
{
|
||||
/* Get the distributed table's relation for this shard.*/
|
||||
Relation targetRelation = RelationIdGetRelation(targetRelationid);
|
||||
|
||||
/*
|
||||
* Check if there has been a schema change (such as a dropped column), by comparing
|
||||
* the number of attributes in the shard table and the shell table.
|
||||
*/
|
||||
TranslateChangesIfSchemaChanged(relation, targetRelation, change);
|
||||
|
||||
/*
|
||||
* Publish the change to the shard table as the change in the distributed table,
|
||||
* so that the CDC client can see the change in the distributed table,
|
||||
* instead of the shard table, by calling the pgoutput's callback function.
|
||||
*/
|
||||
ouputPluginChangeCB(ctx, txn, targetRelation, change);
|
||||
RelationClose(targetRelation);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* PublishChangesIfCdcSlot checks if the current slot is a CDC slot. If so, it publishes
|
||||
* the changes as the change for the distributed table instead of shard.
|
||||
* If not, it returns false. It also skips the Citus metadata tables.
|
||||
*/
|
||||
static void
|
||||
PublishDistributedTableChanges(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
||||
Relation relation, ReorderBufferChange *change)
|
||||
{
|
||||
char *shardRelationName = RelationGetRelationName(relation);
|
||||
|
||||
/* Skip publishing CDC changes for any system relations in pg_catalog*/
|
||||
if (relation->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check if the relation is a distributed table by checking for shard name. */
|
||||
uint64 shardId = CdcExtractShardIdFromTableName(shardRelationName, true);
|
||||
|
||||
/* If this relation is not distributed, call the pgoutput's callback and return. */
|
||||
if (shardId == INVALID_SHARD_ID)
|
||||
{
|
||||
ouputPluginChangeCB(ctx, txn, relation, change);
|
||||
return;
|
||||
}
|
||||
|
||||
bool isReferenceTable = false;
|
||||
Oid distRelationId = LookupDistributedTableIdForShardId(shardId, &isReferenceTable);
|
||||
if (distRelationId == InvalidOid)
|
||||
{
|
||||
ouputPluginChangeCB(ctx, txn, relation, change);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Publish changes for reference table only from the coordinator node. */
|
||||
if (isReferenceTable && !CdcIsCoordinator())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
/* translate and publish from shard relation to distributed table relation for CDC. */
|
||||
TranslateAndPublishRelationForCDC(ctx, txn, relation, change, shardId,
|
||||
distRelationId);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetTupleForTargetSchemaForCdc returns a heap tuple with the data from sourceRelationTuple
|
||||
* to match the schema in targetRelDesc. Either or both source and target relations may have
|
||||
* dropped columns. This function handles it by adding NULL values for dropped columns in
|
||||
* target relation and skipping dropped columns in source relation. It returns a heap tuple
|
||||
* adjusted to the current schema of the target relation.
|
||||
*/
|
||||
static HeapTuple
|
||||
GetTupleForTargetSchemaForCdc(HeapTuple sourceRelationTuple,
|
||||
TupleDesc sourceRelDesc,
|
||||
TupleDesc targetRelDesc)
|
||||
{
|
||||
/* Allocate memory for sourceValues and sourceNulls arrays. */
|
||||
Datum *sourceValues = (Datum *) palloc0(sourceRelDesc->natts * sizeof(Datum));
|
||||
bool *sourceNulls = (bool *) palloc0(sourceRelDesc->natts * sizeof(bool));
|
||||
|
||||
/* Deform the source tuple to sourceValues and sourceNulls arrays. */
|
||||
heap_deform_tuple(sourceRelationTuple, sourceRelDesc, sourceValues,
|
||||
sourceNulls);
|
||||
|
||||
/* This is the next field to Read in the source relation */
|
||||
uint32 sourceIndex = 0;
|
||||
uint32 targetIndex = 0;
|
||||
|
||||
/* Allocate memory for sourceValues and sourceNulls arrays. */
|
||||
Datum *targetValues = (Datum *) palloc0(targetRelDesc->natts * sizeof(Datum));
|
||||
bool *targetNulls = (bool *) palloc0(targetRelDesc->natts * sizeof(bool));
|
||||
|
||||
/* Loop through all source and target attributes one by one and handle any dropped attributes.*/
|
||||
while (targetIndex < targetRelDesc->natts)
|
||||
{
|
||||
/* If this target attribute has been dropped, add a NULL attribute in targetValues and continue.*/
|
||||
if (TupleDescAttr(targetRelDesc, targetIndex)->attisdropped)
|
||||
{
|
||||
Datum nullDatum = (Datum) 0;
|
||||
targetValues[targetIndex] = nullDatum;
|
||||
targetNulls[targetIndex] = true;
|
||||
targetIndex++;
|
||||
}
|
||||
/* If this source attribute has been dropped, just skip this source attribute.*/
|
||||
else if (TupleDescAttr(sourceRelDesc, sourceIndex)->attisdropped)
|
||||
{
|
||||
sourceIndex++;
|
||||
continue;
|
||||
}
|
||||
/* If both source and target attributes are not dropped, add the attribute field to targetValues. */
|
||||
else if (sourceIndex < sourceRelDesc->natts)
|
||||
{
|
||||
targetValues[targetIndex] = sourceValues[sourceIndex];
|
||||
targetNulls[targetIndex] = sourceNulls[sourceIndex];
|
||||
sourceIndex++;
|
||||
targetIndex++;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* If there are no more source fields, add a NULL field in targetValues. */
|
||||
Datum nullDatum = (Datum) 0;
|
||||
targetValues[targetIndex] = nullDatum;
|
||||
targetNulls[targetIndex] = true;
|
||||
targetIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Form a new tuple from the target values created by the above loop. */
|
||||
HeapTuple targetRelationTuple = heap_form_tuple(targetRelDesc, targetValues,
|
||||
targetNulls);
|
||||
return targetRelationTuple;
|
||||
}
|
||||
|
||||
|
||||
/* HasSchemaChanged function returns if there any schema changes between source and target relations.*/
|
||||
static bool
|
||||
HasSchemaChanged(TupleDesc sourceRelationDesc, TupleDesc targetRelationDesc)
|
||||
{
|
||||
bool hasSchemaChanged = (sourceRelationDesc->natts != targetRelationDesc->natts);
|
||||
if (hasSchemaChanged)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
for (uint32 i = 0; i < sourceRelationDesc->natts; i++)
|
||||
{
|
||||
if (TupleDescAttr(sourceRelationDesc, i)->attisdropped ||
|
||||
TupleDescAttr(targetRelationDesc, i)->attisdropped)
|
||||
{
|
||||
hasSchemaChanged = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return hasSchemaChanged;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TranslateChangesIfSchemaChanged translates the tuples ReorderBufferChange
|
||||
* if there is a schema change between source and target relations.
|
||||
*/
|
||||
static void
|
||||
TranslateChangesIfSchemaChanged(Relation sourceRelation, Relation targetRelation,
|
||||
ReorderBufferChange *change)
|
||||
{
|
||||
TupleDesc sourceRelationDesc = RelationGetDescr(sourceRelation);
|
||||
TupleDesc targetRelationDesc = RelationGetDescr(targetRelation);
|
||||
|
||||
/* if there are no changes between source and target relations, return. */
|
||||
if (!HasSchemaChanged(sourceRelationDesc, targetRelationDesc))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check the ReorderBufferChange's action type and handle them accordingly.*/
|
||||
switch (change->action)
|
||||
{
|
||||
case REORDER_BUFFER_CHANGE_INSERT:
|
||||
{
|
||||
/* For insert action, only new tuple should always be translated*/
|
||||
HeapTuple sourceRelationNewTuple = &(change->data.tp.newtuple->tuple);
|
||||
HeapTuple targetRelationNewTuple = GetTupleForTargetSchemaForCdc(
|
||||
sourceRelationNewTuple, sourceRelationDesc, targetRelationDesc);
|
||||
change->data.tp.newtuple->tuple = *targetRelationNewTuple;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* For update changes both old and new tuples need to be translated for target relation
|
||||
* if the REPLICA IDENTITY is set to FULL. Otherwise, only the new tuple needs to be
|
||||
* translated for target relation.
|
||||
*/
|
||||
case REORDER_BUFFER_CHANGE_UPDATE:
|
||||
{
|
||||
/* For update action, new tuple should always be translated*/
|
||||
/* Get the new tuple from the ReorderBufferChange, and translate it to target relation. */
|
||||
HeapTuple sourceRelationNewTuple = &(change->data.tp.newtuple->tuple);
|
||||
HeapTuple targetRelationNewTuple = GetTupleForTargetSchemaForCdc(
|
||||
sourceRelationNewTuple, sourceRelationDesc, targetRelationDesc);
|
||||
change->data.tp.newtuple->tuple = *targetRelationNewTuple;
|
||||
|
||||
/*
|
||||
* Format oldtuple according to the target relation. If the column values of replica
|
||||
* identiy change, then the old tuple is non-null and needs to be formatted according
|
||||
* to the target relation schema.
|
||||
*/
|
||||
if (change->data.tp.oldtuple != NULL)
|
||||
{
|
||||
HeapTuple sourceRelationOldTuple = &(change->data.tp.oldtuple->tuple);
|
||||
HeapTuple targetRelationOldTuple = GetTupleForTargetSchemaForCdc(
|
||||
sourceRelationOldTuple,
|
||||
sourceRelationDesc,
|
||||
targetRelationDesc);
|
||||
|
||||
change->data.tp.oldtuple->tuple = *targetRelationOldTuple;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case REORDER_BUFFER_CHANGE_DELETE:
|
||||
{
|
||||
/* For delete action, only old tuple should be translated*/
|
||||
HeapTuple sourceRelationOldTuple = &(change->data.tp.oldtuple->tuple);
|
||||
HeapTuple targetRelationOldTuple = GetTupleForTargetSchemaForCdc(
|
||||
sourceRelationOldTuple,
|
||||
sourceRelationDesc,
|
||||
targetRelationDesc);
|
||||
|
||||
change->data.tp.oldtuple->tuple = *targetRelationOldTuple;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
/* Do nothing for other action types. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,432 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* cdc_decoder_utils.c
|
||||
* CDC Decoder plugin utility functions for Citus
|
||||
*
|
||||
* Copyright (c) Citus Data, Inc.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
#include "commands/extension.h"
|
||||
#include "fmgr.h"
|
||||
#include "miscadmin.h"
|
||||
#include "access/genam.h"
|
||||
#include "access/heapam.h"
|
||||
#include "common/hashfn.h"
|
||||
#include "common/string.h"
|
||||
#include "utils/fmgroids.h"
|
||||
#include "utils/typcache.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "catalog/pg_namespace.h"
|
||||
#include "cdc_decoder_utils.h"
|
||||
#include "distributed/pg_dist_partition.h"
|
||||
#include "distributed/pg_dist_shard.h"
|
||||
#include "distributed/relay_utility.h"
|
||||
|
||||
static int32 LocalGroupId = -1;
|
||||
static Oid PgDistLocalGroupRelationId = InvalidOid;
|
||||
static Oid PgDistShardRelationId = InvalidOid;
|
||||
static Oid PgDistShardShardidIndexId = InvalidOid;
|
||||
static Oid PgDistPartitionRelationId = InvalidOid;
|
||||
static Oid PgDistPartitionLogicalrelidIndexId = InvalidOid;
|
||||
static bool IsCitusExtensionLoaded = false;
|
||||
|
||||
#define COORDINATOR_GROUP_ID 0
|
||||
#define InvalidRepOriginId 0
|
||||
#define Anum_pg_dist_local_groupid 1
|
||||
#define GROUP_ID_UPGRADING -2
|
||||
|
||||
|
||||
static Oid DistLocalGroupIdRelationId(void);
|
||||
static int32 CdcGetLocalGroupId(void);
|
||||
static HeapTuple CdcPgDistPartitionTupleViaCatalog(Oid relationId);
|
||||
|
||||
/*
|
||||
* DistLocalGroupIdRelationId returns the relation id of the pg_dist_local_group
|
||||
*/
|
||||
static Oid
|
||||
DistLocalGroupIdRelationId(void)
|
||||
{
|
||||
if (PgDistLocalGroupRelationId == InvalidOid)
|
||||
{
|
||||
PgDistLocalGroupRelationId = get_relname_relid("pg_dist_local_group",
|
||||
PG_CATALOG_NAMESPACE);
|
||||
}
|
||||
return PgDistLocalGroupRelationId;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DistShardRelationId returns the relation id of the pg_dist_shard
|
||||
*/
|
||||
static Oid
|
||||
DistShardRelationId(void)
|
||||
{
|
||||
if (PgDistShardRelationId == InvalidOid)
|
||||
{
|
||||
PgDistShardRelationId = get_relname_relid("pg_dist_shard", PG_CATALOG_NAMESPACE);
|
||||
}
|
||||
return PgDistShardRelationId;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DistShardRelationId returns the relation id of the pg_dist_shard
|
||||
*/
|
||||
static Oid
|
||||
DistShardShardidIndexId(void)
|
||||
{
|
||||
if (PgDistShardShardidIndexId == InvalidOid)
|
||||
{
|
||||
PgDistShardShardidIndexId = get_relname_relid("pg_dist_shard_shardid_index",
|
||||
PG_CATALOG_NAMESPACE);
|
||||
}
|
||||
return PgDistShardShardidIndexId;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DistShardRelationId returns the relation id of the pg_dist_shard
|
||||
*/
|
||||
static Oid
|
||||
DistPartitionRelationId(void)
|
||||
{
|
||||
if (PgDistPartitionRelationId == InvalidOid)
|
||||
{
|
||||
PgDistPartitionRelationId = get_relname_relid("pg_dist_partition",
|
||||
PG_CATALOG_NAMESPACE);
|
||||
}
|
||||
return PgDistPartitionRelationId;
|
||||
}
|
||||
|
||||
|
||||
static Oid
|
||||
DistPartitionLogicalRelidIndexId(void)
|
||||
{
|
||||
if (PgDistPartitionLogicalrelidIndexId == InvalidOid)
|
||||
{
|
||||
PgDistPartitionLogicalrelidIndexId = get_relname_relid(
|
||||
"pg_dist_partition_logicalrelid_index", PG_CATALOG_NAMESPACE);
|
||||
}
|
||||
return PgDistPartitionLogicalrelidIndexId;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CdcIsCoordinator function returns true if this node is identified as the
|
||||
* schema/coordinator/master node of the cluster.
|
||||
*/
|
||||
bool
|
||||
CdcIsCoordinator(void)
|
||||
{
|
||||
return (CdcGetLocalGroupId() == COORDINATOR_GROUP_ID);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CdcCitusHasBeenLoaded function returns true if the citus extension has been loaded.
|
||||
*/
|
||||
bool
|
||||
CdcCitusHasBeenLoaded()
|
||||
{
|
||||
if (!IsCitusExtensionLoaded)
|
||||
{
|
||||
IsCitusExtensionLoaded = (get_extension_oid("citus", true) != InvalidOid);
|
||||
}
|
||||
|
||||
return IsCitusExtensionLoaded;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ExtractShardIdFromTableName tries to extract shard id from the given table name,
|
||||
* and returns the shard id if table name is formatted as shard name.
|
||||
* Else, the function returns INVALID_SHARD_ID.
|
||||
*/
|
||||
uint64
|
||||
CdcExtractShardIdFromTableName(const char *tableName, bool missingOk)
|
||||
{
|
||||
char *shardIdStringEnd = NULL;
|
||||
|
||||
/* find the last underscore and increment for shardId string */
|
||||
char *shardIdString = strrchr(tableName, SHARD_NAME_SEPARATOR);
|
||||
if (shardIdString == NULL && !missingOk)
|
||||
{
|
||||
ereport(ERROR, (errmsg("could not extract shardId from table name \"%s\"",
|
||||
tableName)));
|
||||
}
|
||||
else if (shardIdString == NULL && missingOk)
|
||||
{
|
||||
return INVALID_SHARD_ID;
|
||||
}
|
||||
|
||||
shardIdString++;
|
||||
|
||||
errno = 0;
|
||||
uint64 shardId = strtoull(shardIdString, &shardIdStringEnd, 0);
|
||||
|
||||
if (errno != 0 || (*shardIdStringEnd != '\0'))
|
||||
{
|
||||
if (!missingOk)
|
||||
{
|
||||
ereport(ERROR, (errmsg("could not extract shardId from table name \"%s\"",
|
||||
tableName)));
|
||||
}
|
||||
else
|
||||
{
|
||||
return INVALID_SHARD_ID;
|
||||
}
|
||||
}
|
||||
|
||||
return shardId;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CdcGetLocalGroupId returns the group identifier of the local node. The function assumes
|
||||
* that pg_dist_local_node_group has exactly one row and has at least one column.
|
||||
* Otherwise, the function errors out.
|
||||
*/
|
||||
static int32
|
||||
CdcGetLocalGroupId(void)
|
||||
{
|
||||
ScanKeyData scanKey[1];
|
||||
int scanKeyCount = 0;
|
||||
int32 groupId = 0;
|
||||
|
||||
/*
|
||||
* Already set the group id, no need to read the heap again.
|
||||
*/
|
||||
if (LocalGroupId != -1)
|
||||
{
|
||||
return LocalGroupId;
|
||||
}
|
||||
|
||||
Oid localGroupTableOid = DistLocalGroupIdRelationId();
|
||||
if (localGroupTableOid == InvalidOid)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
Relation pgDistLocalGroupId = table_open(localGroupTableOid, AccessShareLock);
|
||||
|
||||
SysScanDesc scanDescriptor = systable_beginscan(pgDistLocalGroupId,
|
||||
InvalidOid, false,
|
||||
NULL, scanKeyCount, scanKey);
|
||||
|
||||
TupleDesc tupleDescriptor = RelationGetDescr(pgDistLocalGroupId);
|
||||
|
||||
HeapTuple heapTuple = systable_getnext(scanDescriptor);
|
||||
|
||||
if (HeapTupleIsValid(heapTuple))
|
||||
{
|
||||
bool isNull = false;
|
||||
Datum groupIdDatum = heap_getattr(heapTuple,
|
||||
Anum_pg_dist_local_groupid,
|
||||
tupleDescriptor, &isNull);
|
||||
|
||||
groupId = DatumGetInt32(groupIdDatum);
|
||||
|
||||
/* set the local cache variable */
|
||||
LocalGroupId = groupId;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Upgrade is happening. When upgrading postgres, pg_dist_local_group is
|
||||
* temporarily empty before citus_finish_pg_upgrade() finishes execution.
|
||||
*/
|
||||
groupId = GROUP_ID_UPGRADING;
|
||||
}
|
||||
|
||||
systable_endscan(scanDescriptor);
|
||||
table_close(pgDistLocalGroupId, AccessShareLock);
|
||||
|
||||
return groupId;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CdcLookupShardRelationFromCatalog returns the logical relation oid a shard belongs to.
|
||||
*
|
||||
* Errors out if the shardId does not exist and missingOk is false.
|
||||
* Returns InvalidOid if the shardId does not exist and missingOk is true.
|
||||
*/
|
||||
Oid
|
||||
CdcLookupShardRelationFromCatalog(int64 shardId, bool missingOk)
|
||||
{
|
||||
ScanKeyData scanKey[1];
|
||||
int scanKeyCount = 1;
|
||||
Form_pg_dist_shard shardForm = NULL;
|
||||
Relation pgDistShard = table_open(DistShardRelationId(), AccessShareLock);
|
||||
Oid relationId = InvalidOid;
|
||||
|
||||
ScanKeyInit(&scanKey[0], Anum_pg_dist_shard_shardid,
|
||||
BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(shardId));
|
||||
|
||||
SysScanDesc scanDescriptor = systable_beginscan(pgDistShard,
|
||||
DistShardShardidIndexId(), true,
|
||||
NULL, scanKeyCount, scanKey);
|
||||
|
||||
HeapTuple heapTuple = systable_getnext(scanDescriptor);
|
||||
if (!HeapTupleIsValid(heapTuple) && !missingOk)
|
||||
{
|
||||
ereport(ERROR, (errmsg("could not find valid entry for shard "
|
||||
UINT64_FORMAT, shardId)));
|
||||
}
|
||||
|
||||
if (!HeapTupleIsValid(heapTuple))
|
||||
{
|
||||
relationId = InvalidOid;
|
||||
}
|
||||
else
|
||||
{
|
||||
shardForm = (Form_pg_dist_shard) GETSTRUCT(heapTuple);
|
||||
relationId = shardForm->logicalrelid;
|
||||
}
|
||||
|
||||
systable_endscan(scanDescriptor);
|
||||
table_close(pgDistShard, NoLock);
|
||||
|
||||
return relationId;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CdcPgDistPartitionTupleViaCatalog is a helper function that searches
|
||||
* pg_dist_partition for the given relationId. The caller is responsible
|
||||
* for ensuring that the returned heap tuple is valid before accessing
|
||||
* its fields.
|
||||
*/
|
||||
static HeapTuple
|
||||
CdcPgDistPartitionTupleViaCatalog(Oid relationId)
|
||||
{
|
||||
const int scanKeyCount = 1;
|
||||
ScanKeyData scanKey[1];
|
||||
bool indexOK = true;
|
||||
|
||||
Relation pgDistPartition = table_open(DistPartitionRelationId(), AccessShareLock);
|
||||
|
||||
ScanKeyInit(&scanKey[0], Anum_pg_dist_partition_logicalrelid,
|
||||
BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relationId));
|
||||
|
||||
SysScanDesc scanDescriptor = systable_beginscan(pgDistPartition,
|
||||
DistPartitionLogicalRelidIndexId(),
|
||||
indexOK, NULL, scanKeyCount, scanKey);
|
||||
|
||||
HeapTuple partitionTuple = systable_getnext(scanDescriptor);
|
||||
|
||||
if (HeapTupleIsValid(partitionTuple))
|
||||
{
|
||||
/* callers should have the tuple in their memory contexts */
|
||||
partitionTuple = heap_copytuple(partitionTuple);
|
||||
}
|
||||
|
||||
systable_endscan(scanDescriptor);
|
||||
table_close(pgDistPartition, AccessShareLock);
|
||||
|
||||
return partitionTuple;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CdcPartitionMethodViaCatalog gets a relationId and returns the partition
|
||||
* method column from pg_dist_partition via reading from catalog.
|
||||
*/
|
||||
char
|
||||
CdcPartitionMethodViaCatalog(Oid relationId)
|
||||
{
|
||||
HeapTuple partitionTuple = CdcPgDistPartitionTupleViaCatalog(relationId);
|
||||
if (!HeapTupleIsValid(partitionTuple))
|
||||
{
|
||||
return DISTRIBUTE_BY_INVALID;
|
||||
}
|
||||
|
||||
Datum datumArray[Natts_pg_dist_partition];
|
||||
bool isNullArray[Natts_pg_dist_partition];
|
||||
|
||||
Relation pgDistPartition = table_open(DistPartitionRelationId(), AccessShareLock);
|
||||
|
||||
TupleDesc tupleDescriptor = RelationGetDescr(pgDistPartition);
|
||||
heap_deform_tuple(partitionTuple, tupleDescriptor, datumArray, isNullArray);
|
||||
|
||||
if (isNullArray[Anum_pg_dist_partition_partmethod - 1])
|
||||
{
|
||||
/* partition method cannot be NULL, still let's make sure */
|
||||
heap_freetuple(partitionTuple);
|
||||
table_close(pgDistPartition, NoLock);
|
||||
return DISTRIBUTE_BY_INVALID;
|
||||
}
|
||||
|
||||
Datum partitionMethodDatum = datumArray[Anum_pg_dist_partition_partmethod - 1];
|
||||
char partitionMethodChar = DatumGetChar(partitionMethodDatum);
|
||||
|
||||
heap_freetuple(partitionTuple);
|
||||
table_close(pgDistPartition, NoLock);
|
||||
|
||||
return partitionMethodChar;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RemoveCitusDecodersFromPaths removes a path ending in citus_decoders
|
||||
* from the given input paths.
|
||||
*/
|
||||
char *
|
||||
RemoveCitusDecodersFromPaths(char *paths)
|
||||
{
|
||||
if (strlen(paths) == 0)
|
||||
{
|
||||
/* dynamic_library_path is empty */
|
||||
return paths;
|
||||
}
|
||||
|
||||
StringInfo newPaths = makeStringInfo();
|
||||
|
||||
char *remainingPaths = paths;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int pathLength = 0;
|
||||
|
||||
char *pathStart = first_path_var_separator(remainingPaths);
|
||||
if (pathStart == remainingPaths)
|
||||
{
|
||||
/*
|
||||
* This will error out in find_in_dynamic_libpath, return
|
||||
* original value here.
|
||||
*/
|
||||
return paths;
|
||||
}
|
||||
else if (pathStart == NULL)
|
||||
{
|
||||
/* final path */
|
||||
pathLength = strlen(remainingPaths);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* more paths remaining */
|
||||
pathLength = pathStart - remainingPaths;
|
||||
}
|
||||
|
||||
char *currentPath = palloc(pathLength + 1);
|
||||
strlcpy(currentPath, remainingPaths, pathLength + 1);
|
||||
canonicalize_path(currentPath);
|
||||
|
||||
if (!pg_str_endswith(currentPath, "/citus_decoders"))
|
||||
{
|
||||
appendStringInfo(newPaths, "%s%s", newPaths->len > 0 ? ":" : "", currentPath);
|
||||
}
|
||||
|
||||
if (remainingPaths[pathLength] == '\0')
|
||||
{
|
||||
/* end of string */
|
||||
break;
|
||||
}
|
||||
|
||||
remainingPaths += pathLength + 1;
|
||||
}
|
||||
|
||||
return newPaths->data;
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* cdc_decoder_utils.h
|
||||
* Utility functions and declerations for cdc decoder.
|
||||
*
|
||||
* Copyright (c) Citus Data, Inc.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef CITUS_CDC_DECODER_H
|
||||
#define CITUS_CDC_DECODER_H
|
||||
|
||||
#include "postgres.h"
|
||||
#include "fmgr.h"
|
||||
#include "replication/logical.h"
|
||||
#include "c.h"
|
||||
|
||||
#define InvalidRepOriginId 0
|
||||
#define INVALID_SHARD_ID 0
|
||||
|
||||
bool CdcIsCoordinator(void);
|
||||
|
||||
uint64 CdcExtractShardIdFromTableName(const char *tableName, bool missingOk);
|
||||
|
||||
Oid CdcLookupShardRelationFromCatalog(int64 shardId, bool missingOk);
|
||||
|
||||
char CdcPartitionMethodViaCatalog(Oid relationId);
|
||||
|
||||
bool CdcCitusHasBeenLoaded(void);
|
||||
|
||||
char * RemoveCitusDecodersFromPaths(char *paths);
|
||||
|
||||
#endif /* CITUS_CDC_DECODER_UTILS_H */
|
|
@ -55,6 +55,7 @@
|
|||
#include "distributed/multi_partitioning_utils.h"
|
||||
#include "distributed/reference_table_utils.h"
|
||||
#include "distributed/relation_access_tracking.h"
|
||||
#include "distributed/replication_origin_session_utils.h"
|
||||
#include "distributed/shared_library_init.h"
|
||||
#include "distributed/shard_utils.h"
|
||||
#include "distributed/worker_protocol.h"
|
||||
|
@ -183,6 +184,7 @@ static TableConversionReturn * AlterDistributedTable(TableConversionParameters *
|
|||
static TableConversionReturn * AlterTableSetAccessMethod(
|
||||
TableConversionParameters *params);
|
||||
static TableConversionReturn * ConvertTable(TableConversionState *con);
|
||||
static TableConversionReturn * ConvertTableInternal(TableConversionState *con);
|
||||
static bool SwitchToSequentialAndLocalExecutionIfShardNameTooLong(char *relationName,
|
||||
char *longestShardName);
|
||||
static void DropIndexesNotSupportedByColumnar(Oid relationId,
|
||||
|
@ -215,7 +217,10 @@ static bool WillRecreateForeignKeyToReferenceTable(Oid relationId,
|
|||
CascadeToColocatedOption cascadeOption);
|
||||
static void WarningsForDroppingForeignKeysWithDistributedTables(Oid relationId);
|
||||
static void ErrorIfUnsupportedCascadeObjects(Oid relationId);
|
||||
static List * WrapTableDDLCommands(List *commandStrings);
|
||||
static bool DoesCascadeDropUnsupportedObject(Oid classId, Oid id, HTAB *nodeMap);
|
||||
static TableConversionReturn * CopyTableConversionReturnIntoCurrentContext(
|
||||
TableConversionReturn *tableConversionReturn);
|
||||
|
||||
PG_FUNCTION_INFO_V1(undistribute_table);
|
||||
PG_FUNCTION_INFO_V1(alter_distributed_table);
|
||||
|
@ -402,7 +407,11 @@ UndistributeTable(TableConversionParameters *params)
|
|||
params->conversionType = UNDISTRIBUTE_TABLE;
|
||||
params->shardCountIsNull = true;
|
||||
TableConversionState *con = CreateTableConversion(params);
|
||||
return ConvertTable(con);
|
||||
|
||||
SetupReplicationOriginLocalSession();
|
||||
TableConversionReturn *conv = ConvertTable(con);
|
||||
ResetReplicationOriginLocalSession();
|
||||
return conv;
|
||||
}
|
||||
|
||||
|
||||
|
@ -441,6 +450,7 @@ AlterDistributedTable(TableConversionParameters *params)
|
|||
ereport(DEBUG1, (errmsg("setting multi shard modify mode to sequential")));
|
||||
SetLocalMultiShardModifyModeToSequential();
|
||||
}
|
||||
|
||||
return ConvertTable(con);
|
||||
}
|
||||
|
||||
|
@ -511,9 +521,9 @@ AlterTableSetAccessMethod(TableConversionParameters *params)
|
|||
|
||||
|
||||
/*
|
||||
* ConvertTable is used for converting a table into a new table with different properties.
|
||||
* The conversion is done by creating a new table, moving everything to the new table and
|
||||
* dropping the old one. So the oid of the table is not preserved.
|
||||
* ConvertTableInternal is used for converting a table into a new table with different
|
||||
* properties. The conversion is done by creating a new table, moving everything to the
|
||||
* new table and dropping the old one. So the oid of the table is not preserved.
|
||||
*
|
||||
* The new table will have the same name, columns and rows. It will also have partitions,
|
||||
* views, sequences of the old table. Finally it will have everything created by
|
||||
|
@ -532,7 +542,7 @@ AlterTableSetAccessMethod(TableConversionParameters *params)
|
|||
* in case you add a new way to return from this function.
|
||||
*/
|
||||
TableConversionReturn *
|
||||
ConvertTable(TableConversionState *con)
|
||||
ConvertTableInternal(TableConversionState *con)
|
||||
{
|
||||
InTableTypeConversionFunctionCall = true;
|
||||
|
||||
|
@ -595,9 +605,18 @@ ConvertTable(TableConversionState *con)
|
|||
List *justBeforeDropCommands = NIL;
|
||||
List *attachPartitionCommands = NIL;
|
||||
|
||||
postLoadCommands =
|
||||
list_concat(postLoadCommands,
|
||||
GetViewCreationTableDDLCommandsOfTable(con->relationId));
|
||||
List *createViewCommands = GetViewCreationCommandsOfTable(con->relationId);
|
||||
|
||||
postLoadCommands = list_concat(postLoadCommands,
|
||||
WrapTableDDLCommands(createViewCommands));
|
||||
|
||||
/* need to add back to publications after dropping the original table */
|
||||
bool isAdd = true;
|
||||
List *alterPublicationCommands =
|
||||
GetAlterPublicationDDLCommandsForTable(con->relationId, isAdd);
|
||||
|
||||
postLoadCommands = list_concat(postLoadCommands,
|
||||
WrapTableDDLCommands(alterPublicationCommands));
|
||||
|
||||
List *foreignKeyCommands = NIL;
|
||||
if (con->conversionType == ALTER_DISTRIBUTED_TABLE)
|
||||
|
@ -800,9 +819,21 @@ ConvertTable(TableConversionState *con)
|
|||
ExecuteQueryViaSPI(tableConstructionSQL, SPI_OK_UTILITY);
|
||||
}
|
||||
|
||||
/*
|
||||
* when there are many partitions, each call to ProcessUtilityParseTree
|
||||
* accumulates used memory. Free context after each call.
|
||||
*/
|
||||
MemoryContext citusPerPartitionContext =
|
||||
AllocSetContextCreate(CurrentMemoryContext,
|
||||
"citus_per_partition_context",
|
||||
ALLOCSET_DEFAULT_SIZES);
|
||||
MemoryContext oldContext = MemoryContextSwitchTo(citusPerPartitionContext);
|
||||
|
||||
char *attachPartitionCommand = NULL;
|
||||
foreach_ptr(attachPartitionCommand, attachPartitionCommands)
|
||||
{
|
||||
MemoryContextReset(citusPerPartitionContext);
|
||||
|
||||
Node *parseTree = ParseTreeNode(attachPartitionCommand);
|
||||
|
||||
ProcessUtilityParseTree(parseTree, attachPartitionCommand,
|
||||
|
@ -810,6 +841,9 @@ ConvertTable(TableConversionState *con)
|
|||
NULL, None_Receiver, NULL);
|
||||
}
|
||||
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
MemoryContextDelete(citusPerPartitionContext);
|
||||
|
||||
if (isPartitionTable)
|
||||
{
|
||||
ExecuteQueryViaSPI(attachToParentCommand, SPI_OK_UTILITY);
|
||||
|
@ -869,10 +903,77 @@ ConvertTable(TableConversionState *con)
|
|||
SetLocalEnableLocalReferenceForeignKeys(oldEnableLocalReferenceForeignKeys);
|
||||
|
||||
InTableTypeConversionFunctionCall = false;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CopyTableConversionReturnIntoCurrentContext copies given tableConversionReturn
|
||||
* into CurrentMemoryContext.
|
||||
*/
|
||||
static TableConversionReturn *
|
||||
CopyTableConversionReturnIntoCurrentContext(TableConversionReturn *tableConversionReturn)
|
||||
{
|
||||
TableConversionReturn *tableConversionReturnCopy = NULL;
|
||||
if (tableConversionReturn)
|
||||
{
|
||||
tableConversionReturnCopy = palloc0(sizeof(TableConversionReturn));
|
||||
List *copyForeignKeyCommands = NIL;
|
||||
char *foreignKeyCommand = NULL;
|
||||
foreach_ptr(foreignKeyCommand, tableConversionReturn->foreignKeyCommands)
|
||||
{
|
||||
char *copyForeignKeyCommand = MemoryContextStrdup(CurrentMemoryContext,
|
||||
foreignKeyCommand);
|
||||
copyForeignKeyCommands = lappend(copyForeignKeyCommands,
|
||||
copyForeignKeyCommand);
|
||||
}
|
||||
tableConversionReturnCopy->foreignKeyCommands = copyForeignKeyCommands;
|
||||
}
|
||||
|
||||
return tableConversionReturnCopy;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ConvertTable is a wrapper for ConvertTableInternal to persist only
|
||||
* TableConversionReturn and delete all other allocations.
|
||||
*/
|
||||
static TableConversionReturn *
|
||||
ConvertTable(TableConversionState *con)
|
||||
{
|
||||
/*
|
||||
* We do not allow alter_distributed_table and undistribute_table operations
|
||||
* for tables with identity columns. This is because we do not have a proper way
|
||||
* of keeping sequence states consistent across the cluster.
|
||||
*/
|
||||
ErrorIfTableHasIdentityColumn(con->relationId);
|
||||
|
||||
/*
|
||||
* when there are many partitions or colocated tables, memory usage is
|
||||
* accumulated. Free context for each call to ConvertTable.
|
||||
*/
|
||||
MemoryContext convertTableContext =
|
||||
AllocSetContextCreate(CurrentMemoryContext,
|
||||
"citus_convert_table_context",
|
||||
ALLOCSET_DEFAULT_SIZES);
|
||||
MemoryContext oldContext = MemoryContextSwitchTo(convertTableContext);
|
||||
|
||||
TableConversionReturn *tableConversionReturn = ConvertTableInternal(con);
|
||||
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
|
||||
/* persist TableConversionReturn in oldContext */
|
||||
TableConversionReturn *tableConversionReturnCopy =
|
||||
CopyTableConversionReturnIntoCurrentContext(tableConversionReturn);
|
||||
|
||||
/* delete convertTableContext */
|
||||
MemoryContextDelete(convertTableContext);
|
||||
|
||||
return tableConversionReturnCopy;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DropIndexesNotSupportedByColumnar is a helper function used during accces
|
||||
* method conversion to drop the indexes that are not supported by columnarAM.
|
||||
|
@ -1268,8 +1369,7 @@ CreateCitusTableLike(TableConversionState *con)
|
|||
}
|
||||
else if (IsCitusTableType(con->relationId, REFERENCE_TABLE))
|
||||
{
|
||||
CreateDistributedTable(con->newRelationId, NULL, DISTRIBUTE_BY_NONE, 0, false,
|
||||
NULL);
|
||||
CreateReferenceTable(con->newRelationId);
|
||||
}
|
||||
else if (IsCitusTableType(con->relationId, CITUS_LOCAL_TABLE))
|
||||
{
|
||||
|
@ -1410,17 +1510,16 @@ GetViewCreationCommandsOfTable(Oid relationId)
|
|||
|
||||
|
||||
/*
|
||||
* GetViewCreationTableDDLCommandsOfTable is the same as GetViewCreationCommandsOfTable,
|
||||
* but the returned list includes objects of TableDDLCommand's, not strings.
|
||||
* WrapTableDDLCommands takes a list of command strings and wraps them
|
||||
* in TableDDLCommand structs.
|
||||
*/
|
||||
List *
|
||||
GetViewCreationTableDDLCommandsOfTable(Oid relationId)
|
||||
static List *
|
||||
WrapTableDDLCommands(List *commandStrings)
|
||||
{
|
||||
List *commands = GetViewCreationCommandsOfTable(relationId);
|
||||
List *tableDDLCommands = NIL;
|
||||
|
||||
char *command = NULL;
|
||||
foreach_ptr(command, commands)
|
||||
foreach_ptr(command, commandStrings)
|
||||
{
|
||||
tableDDLCommands = lappend(tableDDLCommands, makeTableDDLCommandString(command));
|
||||
}
|
||||
|
@ -1523,96 +1622,6 @@ CreateMaterializedViewDDLCommand(Oid matViewOid)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* This function marks all the identity sequences as distributed on the given table.
|
||||
*/
|
||||
static void
|
||||
MarkIdentitiesAsDistributed(Oid targetRelationId)
|
||||
{
|
||||
Relation relation = relation_open(targetRelationId, AccessShareLock);
|
||||
TupleDesc tupleDescriptor = RelationGetDescr(relation);
|
||||
relation_close(relation, NoLock);
|
||||
|
||||
bool missingSequenceOk = false;
|
||||
|
||||
for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts;
|
||||
attributeIndex++)
|
||||
{
|
||||
Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex);
|
||||
|
||||
if (attributeForm->attidentity)
|
||||
{
|
||||
Oid seqOid = getIdentitySequence(targetRelationId, attributeForm->attnum,
|
||||
missingSequenceOk);
|
||||
|
||||
ObjectAddress seqAddress = { 0 };
|
||||
ObjectAddressSet(seqAddress, RelationRelationId, seqOid);
|
||||
MarkObjectDistributed(&seqAddress);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* This function returns sql statements to rename identites on the given table
|
||||
*/
|
||||
static void
|
||||
PrepareRenameIdentitiesCommands(Oid sourceRelationId, Oid targetRelationId,
|
||||
List **outCoordinatorCommands, List **outWorkerCommands)
|
||||
{
|
||||
Relation targetRelation = relation_open(targetRelationId, AccessShareLock);
|
||||
TupleDesc targetTupleDescriptor = RelationGetDescr(targetRelation);
|
||||
relation_close(targetRelation, NoLock);
|
||||
|
||||
bool missingSequenceOk = false;
|
||||
|
||||
for (int attributeIndex = 0; attributeIndex < targetTupleDescriptor->natts;
|
||||
attributeIndex++)
|
||||
{
|
||||
Form_pg_attribute attributeForm = TupleDescAttr(targetTupleDescriptor,
|
||||
attributeIndex);
|
||||
|
||||
if (attributeForm->attidentity)
|
||||
{
|
||||
char *columnName = NameStr(attributeForm->attname);
|
||||
|
||||
Oid targetSequenceOid = getIdentitySequence(targetRelationId,
|
||||
attributeForm->attnum,
|
||||
missingSequenceOk);
|
||||
char *targetSequenceName = generate_relation_name(targetSequenceOid, NIL);
|
||||
|
||||
Oid sourceSequenceOid = getIdentitySequence(sourceRelationId,
|
||||
attributeForm->attnum,
|
||||
missingSequenceOk);
|
||||
char *sourceSequenceName = generate_relation_name(sourceSequenceOid, NIL);
|
||||
|
||||
/* to rename sequence on the coordinator */
|
||||
*outCoordinatorCommands = lappend(*outCoordinatorCommands, psprintf(
|
||||
"SET citus.enable_ddl_propagation TO OFF; ALTER SEQUENCE %s RENAME TO %s; RESET citus.enable_ddl_propagation;",
|
||||
quote_identifier(
|
||||
targetSequenceName),
|
||||
quote_identifier(
|
||||
sourceSequenceName)));
|
||||
|
||||
/* update workers to use existing sequence and drop the new one generated by PG */
|
||||
bool missingTableOk = true;
|
||||
*outWorkerCommands = lappend(*outWorkerCommands,
|
||||
GetAlterColumnWithNextvalDefaultCmd(
|
||||
sourceSequenceOid, sourceRelationId,
|
||||
columnName,
|
||||
missingTableOk));
|
||||
|
||||
|
||||
/* drop the sequence generated by identity column */
|
||||
*outWorkerCommands = lappend(*outWorkerCommands, psprintf(
|
||||
"DROP SEQUENCE IF EXISTS %s",
|
||||
quote_identifier(
|
||||
targetSequenceName)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ReplaceTable replaces the source table with the target table.
|
||||
* It moves all the rows of the source table to target table with INSERT SELECT.
|
||||
|
@ -1671,24 +1680,6 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands,
|
|||
ExecuteQueryViaSPI(query->data, SPI_OK_INSERT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop identity dependencies (sequences marked as DEPENDENCY_INTERNAL) on the workers
|
||||
* to keep their states after the source table is dropped.
|
||||
*/
|
||||
List *ownedIdentitySequences = getOwnedSequences_internal(sourceId, 0,
|
||||
DEPENDENCY_INTERNAL);
|
||||
if (ownedIdentitySequences != NIL && ShouldSyncTableMetadata(sourceId))
|
||||
{
|
||||
char *qualifiedTableName = quote_qualified_identifier(schemaName, sourceName);
|
||||
StringInfo command = makeStringInfo();
|
||||
|
||||
appendStringInfo(command,
|
||||
"SELECT pg_catalog.worker_drop_sequence_dependency(%s);",
|
||||
quote_literal_cstr(qualifiedTableName));
|
||||
|
||||
SendCommandToWorkersWithMetadata(command->data);
|
||||
}
|
||||
|
||||
/*
|
||||
* Modify regular sequence dependencies (sequences marked as DEPENDENCY_AUTO)
|
||||
*/
|
||||
|
@ -1748,23 +1739,6 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands,
|
|||
quote_qualified_identifier(schemaName, sourceName))));
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to prepare rename identities commands before dropping the original table,
|
||||
* otherwise we can't find the original names of the identity sequences.
|
||||
* We prepare separate commands for the coordinator and the workers because:
|
||||
* In the coordinator, we simply need to rename the identity sequences
|
||||
* to their names on the old table, because right now the identity
|
||||
* sequences have default names generated by Postgres with the creation of the new table
|
||||
* In the workers, we have not dropped the original identity sequences,
|
||||
* so what we do is we alter the columns and set their default to the
|
||||
* original identity sequences, and after that we drop the new sequences.
|
||||
*/
|
||||
List *coordinatorCommandsToRenameIdentites = NIL;
|
||||
List *workerCommandsToRenameIdentites = NIL;
|
||||
PrepareRenameIdentitiesCommands(sourceId, targetId,
|
||||
&coordinatorCommandsToRenameIdentites,
|
||||
&workerCommandsToRenameIdentites);
|
||||
|
||||
resetStringInfo(query);
|
||||
appendStringInfo(query, "DROP %sTABLE %s CASCADE",
|
||||
IsForeignTable(sourceId) ? "FOREIGN " : "",
|
||||
|
@ -1782,27 +1756,6 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands,
|
|||
quote_qualified_identifier(schemaName, targetName),
|
||||
quote_identifier(sourceName));
|
||||
ExecuteQueryViaSPI(query->data, SPI_OK_UTILITY);
|
||||
|
||||
char *coordinatorCommand = NULL;
|
||||
foreach_ptr(coordinatorCommand, coordinatorCommandsToRenameIdentites)
|
||||
{
|
||||
ExecuteQueryViaSPI(coordinatorCommand, SPI_OK_UTILITY);
|
||||
}
|
||||
|
||||
char *workerCommand = NULL;
|
||||
foreach_ptr(workerCommand, workerCommandsToRenameIdentites)
|
||||
{
|
||||
SendCommandToWorkersWithMetadata(workerCommand);
|
||||
}
|
||||
|
||||
/*
|
||||
* To preserve identity sequences states in case of redistributing the table again,
|
||||
* we don't drop them when we undistribute a table. To maintain consistency and
|
||||
* avoid future problems if we redistribute the table, we want to apply all changes happening to
|
||||
* the identity sequence in the coordinator to their corresponding sequences in the workers as well.
|
||||
* That's why we have to mark identity sequences as distributed
|
||||
*/
|
||||
MarkIdentitiesAsDistributed(targetId);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -85,6 +85,7 @@ static void DropRelationTruncateTriggers(Oid relationId);
|
|||
static char * GetDropTriggerCommand(Oid relationId, char *triggerName);
|
||||
static void DropViewsOnTable(Oid relationId);
|
||||
static void DropIdentitiesOnTable(Oid relationId);
|
||||
static void DropTableFromPublications(Oid relationId);
|
||||
static List * GetRenameStatsCommandList(List *statsOidList, uint64 shardId);
|
||||
static List * ReversedOidList(List *oidList);
|
||||
static void AppendExplicitIndexIdsToList(Form_pg_index indexForm,
|
||||
|
@ -338,6 +339,10 @@ CreateCitusLocalTable(Oid relationId, bool cascadeViaForeignKeys, bool autoConve
|
|||
List *shellTableDDLEvents = GetShellTableDDLEventsForCitusLocalTable(relationId);
|
||||
List *tableViewCreationCommands = GetViewCreationCommandsOfTable(relationId);
|
||||
|
||||
bool isAdd = true;
|
||||
List *alterPublicationCommands =
|
||||
GetAlterPublicationDDLCommandsForTable(relationId, isAdd);
|
||||
|
||||
char *relationName = get_rel_name(relationId);
|
||||
Oid relationSchemaId = get_rel_namespace(relationId);
|
||||
|
||||
|
@ -347,6 +352,12 @@ CreateCitusLocalTable(Oid relationId, bool cascadeViaForeignKeys, bool autoConve
|
|||
*/
|
||||
DropIdentitiesOnTable(relationId);
|
||||
|
||||
/*
|
||||
* We do not want the shard to be in the publication (subscribers are
|
||||
* unlikely to recognize it).
|
||||
*/
|
||||
DropTableFromPublications(relationId);
|
||||
|
||||
/* below we convert relation with relationId to the shard relation */
|
||||
uint64 shardId = ConvertLocalTableToShard(relationId);
|
||||
|
||||
|
@ -363,6 +374,11 @@ CreateCitusLocalTable(Oid relationId, bool cascadeViaForeignKeys, bool autoConve
|
|||
*/
|
||||
ExecuteAndLogUtilityCommandListInTableTypeConversionViaSPI(tableViewCreationCommands);
|
||||
|
||||
/*
|
||||
* Execute the publication creation commands with the shell table.
|
||||
*/
|
||||
ExecuteAndLogUtilityCommandListInTableTypeConversionViaSPI(alterPublicationCommands);
|
||||
|
||||
/*
|
||||
* Set shellRelationId as the relation with relationId now points
|
||||
* to the shard relation.
|
||||
|
@ -1131,7 +1147,7 @@ DropIdentitiesOnTable(Oid relationId)
|
|||
{
|
||||
Relation relation = relation_open(relationId, AccessShareLock);
|
||||
TupleDesc tupleDescriptor = RelationGetDescr(relation);
|
||||
relation_close(relation, NoLock);
|
||||
List *dropCommandList = NIL;
|
||||
|
||||
for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts;
|
||||
attributeIndex++)
|
||||
|
@ -1151,15 +1167,38 @@ DropIdentitiesOnTable(Oid relationId)
|
|||
qualifiedTableName,
|
||||
columnName);
|
||||
|
||||
dropCommandList = lappend(dropCommandList, dropCommand->data);
|
||||
}
|
||||
}
|
||||
|
||||
relation_close(relation, NoLock);
|
||||
|
||||
char *dropCommand = NULL;
|
||||
foreach_ptr(dropCommand, dropCommandList)
|
||||
{
|
||||
/*
|
||||
* We need to disable/enable ddl propagation for this command, to prevent
|
||||
* sending unnecessary ALTER COLUMN commands for partitions, to MX workers.
|
||||
*/
|
||||
ExecuteAndLogUtilityCommandList(list_make3(DISABLE_DDL_PROPAGATION,
|
||||
dropCommand->data,
|
||||
dropCommand,
|
||||
ENABLE_DDL_PROPAGATION));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DropTableFromPublications drops the table from all of its publications.
|
||||
*/
|
||||
static void
|
||||
DropTableFromPublications(Oid relationId)
|
||||
{
|
||||
bool isAdd = false;
|
||||
|
||||
List *alterPublicationCommands =
|
||||
GetAlterPublicationDDLCommandsForTable(relationId, isAdd);
|
||||
|
||||
ExecuteAndLogUtilityCommandList(alterPublicationCommands);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -94,6 +94,28 @@
|
|||
#include "utils/syscache.h"
|
||||
#include "utils/inval.h"
|
||||
|
||||
|
||||
/* common params that apply to all Citus table types */
|
||||
typedef struct
|
||||
{
|
||||
char distributionMethod;
|
||||
char replicationModel;
|
||||
} CitusTableParams;
|
||||
|
||||
|
||||
/*
|
||||
* Params that only apply to distributed tables, i.e., the ones that are
|
||||
* known as DISTRIBUTED_TABLE by Citus metadata.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
int shardCount;
|
||||
bool shardCountIsStrict;
|
||||
char *colocateWithTableName;
|
||||
char *distributionColumnName;
|
||||
} DistributedTableParams;
|
||||
|
||||
|
||||
/*
|
||||
* once every LOG_PER_TUPLE_AMOUNT, the copy will be logged.
|
||||
*/
|
||||
|
@ -106,17 +128,22 @@ static void CreateDistributedTableConcurrently(Oid relationId,
|
|||
char *colocateWithTableName,
|
||||
int shardCount,
|
||||
bool shardCountIsStrict);
|
||||
static char DecideReplicationModel(char distributionMethod, char *colocateWithTableName);
|
||||
static char DecideDistTableReplicationModel(char distributionMethod,
|
||||
char *colocateWithTableName);
|
||||
static List * HashSplitPointsForShardList(List *shardList);
|
||||
static List * HashSplitPointsForShardCount(int shardCount);
|
||||
static List * WorkerNodesForShardList(List *shardList);
|
||||
static List * RoundRobinWorkerNodeList(List *workerNodeList, int listLength);
|
||||
static CitusTableParams DecideCitusTableParams(CitusTableType tableType,
|
||||
DistributedTableParams *
|
||||
distributedTableParams);
|
||||
static void CreateCitusTable(Oid relationId, CitusTableType tableType,
|
||||
DistributedTableParams *distributedTableParams);
|
||||
static void CreateHashDistributedTableShards(Oid relationId, int shardCount,
|
||||
Oid colocatedTableId, bool localTableEmpty);
|
||||
static uint32 ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
|
||||
char distributionMethod, char replicationModel,
|
||||
int shardCount, bool shardCountIsStrict,
|
||||
char *colocateWithTableName);
|
||||
static uint32 ColocationIdForNewTable(Oid relationId, CitusTableType tableType,
|
||||
DistributedTableParams *distributedTableParams,
|
||||
Var *distributionColumn);
|
||||
static void EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn,
|
||||
char distributionMethod, uint32 colocationId,
|
||||
char replicationModel);
|
||||
|
@ -377,7 +404,7 @@ CreateDistributedTableConcurrently(Oid relationId, char *distributionColumnName,
|
|||
|
||||
EnsureForeignKeysForDistributedTableConcurrently(relationId);
|
||||
|
||||
char replicationModel = DecideReplicationModel(distributionMethod,
|
||||
char replicationModel = DecideDistTableReplicationModel(distributionMethod,
|
||||
colocateWithTableName);
|
||||
|
||||
/*
|
||||
|
@ -622,7 +649,7 @@ static void
|
|||
EnsureColocateWithTableIsValid(Oid relationId, char distributionMethod,
|
||||
char *distributionColumnName, char *colocateWithTableName)
|
||||
{
|
||||
char replicationModel = DecideReplicationModel(distributionMethod,
|
||||
char replicationModel = DecideDistTableReplicationModel(distributionMethod,
|
||||
colocateWithTableName);
|
||||
|
||||
/*
|
||||
|
@ -860,9 +887,6 @@ create_reference_table(PG_FUNCTION_ARGS)
|
|||
CheckCitusVersion(ERROR);
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
|
||||
char *colocateWithTableName = NULL;
|
||||
char *distributionColumnName = NULL;
|
||||
|
||||
EnsureCitusTableCanBeCreated(relationId);
|
||||
|
||||
/* enable create_reference_table on an empty node */
|
||||
|
@ -895,8 +919,7 @@ create_reference_table(PG_FUNCTION_ARGS)
|
|||
errdetail("There are no active worker nodes.")));
|
||||
}
|
||||
|
||||
CreateDistributedTable(relationId, distributionColumnName, DISTRIBUTE_BY_NONE,
|
||||
ShardCount, false, colocateWithTableName);
|
||||
CreateReferenceTable(relationId);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
|
@ -951,18 +974,90 @@ EnsureRelationExists(Oid relationId)
|
|||
|
||||
|
||||
/*
|
||||
* CreateDistributedTable creates distributed table in the given configuration.
|
||||
* CreateReferenceTable is a wrapper around CreateCitusTable that creates a
|
||||
* distributed table.
|
||||
*/
|
||||
void
|
||||
CreateDistributedTable(Oid relationId, char *distributionColumnName,
|
||||
char distributionMethod,
|
||||
int shardCount, bool shardCountIsStrict,
|
||||
char *colocateWithTableName)
|
||||
{
|
||||
CitusTableType tableType;
|
||||
switch (distributionMethod)
|
||||
{
|
||||
case DISTRIBUTE_BY_HASH:
|
||||
{
|
||||
tableType = HASH_DISTRIBUTED;
|
||||
break;
|
||||
}
|
||||
|
||||
case DISTRIBUTE_BY_APPEND:
|
||||
{
|
||||
tableType = APPEND_DISTRIBUTED;
|
||||
break;
|
||||
}
|
||||
|
||||
case DISTRIBUTE_BY_RANGE:
|
||||
{
|
||||
tableType = RANGE_DISTRIBUTED;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
ereport(ERROR, (errmsg("unexpected distribution method when "
|
||||
"deciding Citus table type")));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
DistributedTableParams distributedTableParams = {
|
||||
.colocateWithTableName = colocateWithTableName,
|
||||
.shardCount = shardCount,
|
||||
.shardCountIsStrict = shardCountIsStrict,
|
||||
.distributionColumnName = distributionColumnName
|
||||
};
|
||||
CreateCitusTable(relationId, tableType, &distributedTableParams);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CreateReferenceTable is a wrapper around CreateCitusTable that creates a
|
||||
* reference table.
|
||||
*/
|
||||
void
|
||||
CreateReferenceTable(Oid relationId)
|
||||
{
|
||||
CreateCitusTable(relationId, REFERENCE_TABLE, NULL);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CreateCitusTable is the internal method that creates a Citus table in
|
||||
* given configuration.
|
||||
*
|
||||
* DistributedTableParams should be non-null only if we're creating a distributed
|
||||
* table.
|
||||
*
|
||||
* This functions contains all necessary logic to create distributed tables. It
|
||||
* performs necessary checks to ensure distributing the table is safe. If it is
|
||||
* safe to distribute the table, this function creates distributed table metadata,
|
||||
* creates shards and copies local data to shards. This function also handles
|
||||
* partitioned tables by distributing its partitions as well.
|
||||
*/
|
||||
void
|
||||
CreateDistributedTable(Oid relationId, char *distributionColumnName,
|
||||
char distributionMethod, int shardCount,
|
||||
bool shardCountIsStrict, char *colocateWithTableName)
|
||||
static void
|
||||
CreateCitusTable(Oid relationId, CitusTableType tableType,
|
||||
DistributedTableParams *distributedTableParams)
|
||||
{
|
||||
if ((tableType == HASH_DISTRIBUTED || tableType == APPEND_DISTRIBUTED ||
|
||||
tableType == RANGE_DISTRIBUTED) != (distributedTableParams != NULL))
|
||||
{
|
||||
ereport(ERROR, (errmsg("distributed table params must be provided "
|
||||
"when creating a distributed table and must "
|
||||
"not be otherwise")));
|
||||
}
|
||||
|
||||
/*
|
||||
* EnsureTableNotDistributed errors out when relation is a citus table but
|
||||
* we don't want to ask user to first undistribute their citus local tables
|
||||
|
@ -988,11 +1083,8 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
|
|||
* that ALTER TABLE hook does the necessary job, which means converting
|
||||
* local tables to citus local tables to properly support such foreign
|
||||
* keys.
|
||||
*
|
||||
* This function does not expect to create Citus local table, so we blindly
|
||||
* create reference table when the method is DISTRIBUTE_BY_NONE.
|
||||
*/
|
||||
else if (distributionMethod == DISTRIBUTE_BY_NONE &&
|
||||
else if (tableType == REFERENCE_TABLE &&
|
||||
ShouldEnableLocalReferenceForeignKeys() &&
|
||||
HasForeignKeyWithLocalTable(relationId))
|
||||
{
|
||||
|
@ -1022,24 +1114,29 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
|
|||
|
||||
PropagatePrerequisiteObjectsForDistributedTable(relationId);
|
||||
|
||||
char replicationModel = DecideReplicationModel(distributionMethod,
|
||||
colocateWithTableName);
|
||||
|
||||
Var *distributionColumn = BuildDistributionKeyFromColumnName(relationId,
|
||||
Var *distributionColumn = NULL;
|
||||
if (distributedTableParams)
|
||||
{
|
||||
distributionColumn = BuildDistributionKeyFromColumnName(relationId,
|
||||
distributedTableParams->
|
||||
distributionColumnName,
|
||||
NoLock);
|
||||
}
|
||||
|
||||
CitusTableParams citusTableParams = DecideCitusTableParams(tableType,
|
||||
distributedTableParams);
|
||||
|
||||
/*
|
||||
* ColocationIdForNewTable assumes caller acquires lock on relationId. In our case,
|
||||
* our caller already acquired lock on relationId.
|
||||
*/
|
||||
uint32 colocationId = ColocationIdForNewTable(relationId, distributionColumn,
|
||||
distributionMethod, replicationModel,
|
||||
shardCount, shardCountIsStrict,
|
||||
colocateWithTableName);
|
||||
uint32 colocationId = ColocationIdForNewTable(relationId, tableType,
|
||||
distributedTableParams,
|
||||
distributionColumn);
|
||||
|
||||
EnsureRelationCanBeDistributed(relationId, distributionColumn, distributionMethod,
|
||||
colocationId, replicationModel);
|
||||
EnsureRelationCanBeDistributed(relationId, distributionColumn,
|
||||
citusTableParams.distributionMethod,
|
||||
colocationId, citusTableParams.replicationModel);
|
||||
|
||||
/*
|
||||
* Make sure that existing reference tables have been replicated to all the nodes
|
||||
|
@ -1068,8 +1165,10 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
|
|||
bool autoConverted = false;
|
||||
|
||||
/* create an entry for distributed table in pg_dist_partition */
|
||||
InsertIntoPgDistPartition(relationId, distributionMethod, distributionColumn,
|
||||
colocationId, replicationModel, autoConverted);
|
||||
InsertIntoPgDistPartition(relationId, citusTableParams.distributionMethod,
|
||||
distributionColumn,
|
||||
colocationId, citusTableParams.replicationModel,
|
||||
autoConverted);
|
||||
|
||||
/* foreign tables do not support TRUNCATE trigger */
|
||||
if (RegularTable(relationId))
|
||||
|
@ -1078,17 +1177,14 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
|
|||
}
|
||||
|
||||
/* create shards for hash distributed and reference tables */
|
||||
if (distributionMethod == DISTRIBUTE_BY_HASH)
|
||||
if (tableType == HASH_DISTRIBUTED)
|
||||
{
|
||||
CreateHashDistributedTableShards(relationId, shardCount, colocatedTableId,
|
||||
CreateHashDistributedTableShards(relationId, distributedTableParams->shardCount,
|
||||
colocatedTableId,
|
||||
localTableEmpty);
|
||||
}
|
||||
else if (distributionMethod == DISTRIBUTE_BY_NONE)
|
||||
else if (tableType == REFERENCE_TABLE)
|
||||
{
|
||||
/*
|
||||
* This function does not expect to create Citus local table, so we blindly
|
||||
* create reference table when the method is DISTRIBUTE_BY_NONE.
|
||||
*/
|
||||
CreateReferenceTableShard(relationId);
|
||||
}
|
||||
|
||||
|
@ -1116,17 +1212,36 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
|
|||
char *relationName = get_rel_name(relationId);
|
||||
char *parentRelationName = quote_qualified_identifier(schemaName, relationName);
|
||||
|
||||
/*
|
||||
* when there are many partitions, each call to CreateDistributedTable
|
||||
* accumulates used memory. Create and free context for each call.
|
||||
*/
|
||||
MemoryContext citusPartitionContext =
|
||||
AllocSetContextCreate(CurrentMemoryContext,
|
||||
"citus_per_partition_context",
|
||||
ALLOCSET_DEFAULT_SIZES);
|
||||
MemoryContext oldContext = MemoryContextSwitchTo(citusPartitionContext);
|
||||
|
||||
foreach_oid(partitionRelationId, partitionList)
|
||||
{
|
||||
CreateDistributedTable(partitionRelationId, distributionColumnName,
|
||||
distributionMethod, shardCount, false,
|
||||
parentRelationName);
|
||||
MemoryContextReset(citusPartitionContext);
|
||||
|
||||
DistributedTableParams childDistributedTableParams = {
|
||||
.colocateWithTableName = parentRelationName,
|
||||
.shardCount = distributedTableParams->shardCount,
|
||||
.shardCountIsStrict = false,
|
||||
.distributionColumnName = distributedTableParams->distributionColumnName,
|
||||
};
|
||||
CreateCitusTable(partitionRelationId, tableType,
|
||||
&childDistributedTableParams);
|
||||
}
|
||||
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
MemoryContextDelete(citusPartitionContext);
|
||||
}
|
||||
|
||||
/* copy over data for hash distributed and reference tables */
|
||||
if (distributionMethod == DISTRIBUTE_BY_HASH ||
|
||||
distributionMethod == DISTRIBUTE_BY_NONE)
|
||||
if (tableType == HASH_DISTRIBUTED || tableType == REFERENCE_TABLE)
|
||||
{
|
||||
if (RegularTable(relationId))
|
||||
{
|
||||
|
@ -1145,6 +1260,70 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* DecideCitusTableParams decides CitusTableParams based on given CitusTableType
|
||||
* and DistributedTableParams if it's a distributed table.
|
||||
*
|
||||
* DistributedTableParams should be non-null only if CitusTableType corresponds
|
||||
* to a distributed table.
|
||||
*/
|
||||
static
|
||||
CitusTableParams
|
||||
DecideCitusTableParams(CitusTableType tableType,
|
||||
DistributedTableParams *distributedTableParams)
|
||||
{
|
||||
CitusTableParams citusTableParams = { 0 };
|
||||
switch (tableType)
|
||||
{
|
||||
case HASH_DISTRIBUTED:
|
||||
{
|
||||
citusTableParams.distributionMethod = DISTRIBUTE_BY_HASH;
|
||||
citusTableParams.replicationModel =
|
||||
DecideDistTableReplicationModel(DISTRIBUTE_BY_HASH,
|
||||
distributedTableParams->
|
||||
colocateWithTableName);
|
||||
break;
|
||||
}
|
||||
|
||||
case APPEND_DISTRIBUTED:
|
||||
{
|
||||
citusTableParams.distributionMethod = DISTRIBUTE_BY_APPEND;
|
||||
citusTableParams.replicationModel =
|
||||
DecideDistTableReplicationModel(APPEND_DISTRIBUTED,
|
||||
distributedTableParams->
|
||||
colocateWithTableName);
|
||||
break;
|
||||
}
|
||||
|
||||
case RANGE_DISTRIBUTED:
|
||||
{
|
||||
citusTableParams.distributionMethod = DISTRIBUTE_BY_RANGE;
|
||||
citusTableParams.replicationModel =
|
||||
DecideDistTableReplicationModel(RANGE_DISTRIBUTED,
|
||||
distributedTableParams->
|
||||
colocateWithTableName);
|
||||
break;
|
||||
}
|
||||
|
||||
case REFERENCE_TABLE:
|
||||
{
|
||||
citusTableParams.distributionMethod = DISTRIBUTE_BY_NONE;
|
||||
citusTableParams.replicationModel = REPLICATION_MODEL_2PC;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
ereport(ERROR, (errmsg("unexpected table type when deciding Citus "
|
||||
"table params")));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return citusTableParams;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* PropagatePrerequisiteObjectsForDistributedTable ensures we can create shards
|
||||
* on all nodes by ensuring all dependent objects exist on all node.
|
||||
|
@ -1190,7 +1369,7 @@ EnsureSequenceTypeSupported(Oid seqOid, Oid attributeTypeId, Oid ownerRelationId
|
|||
foreach_oid(citusTableId, citusTableIdList)
|
||||
{
|
||||
List *seqInfoList = NIL;
|
||||
GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0);
|
||||
GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0, DEPENDENCY_AUTO);
|
||||
|
||||
SequenceInfo *seqInfo = NULL;
|
||||
foreach_ptr(seqInfo, seqInfoList)
|
||||
|
@ -1267,7 +1446,7 @@ EnsureRelationHasCompatibleSequenceTypes(Oid relationId)
|
|||
{
|
||||
List *seqInfoList = NIL;
|
||||
|
||||
GetDependentSequencesWithRelation(relationId, &seqInfoList, 0);
|
||||
GetDependentSequencesWithRelation(relationId, &seqInfoList, 0, DEPENDENCY_AUTO);
|
||||
EnsureDistributedSequencesHaveOneType(relationId, seqInfoList);
|
||||
}
|
||||
|
||||
|
@ -1405,17 +1584,15 @@ DropFKeysRelationInvolvedWithTableType(Oid relationId, int tableTypeFlag)
|
|||
|
||||
|
||||
/*
|
||||
* DecideReplicationModel function decides which replication model should be
|
||||
* used depending on given distribution configuration.
|
||||
* DecideDistTableReplicationModel function decides which replication model should be
|
||||
* used for a distributed table depending on given distribution configuration.
|
||||
*/
|
||||
static char
|
||||
DecideReplicationModel(char distributionMethod, char *colocateWithTableName)
|
||||
DecideDistTableReplicationModel(char distributionMethod, char *colocateWithTableName)
|
||||
{
|
||||
if (distributionMethod == DISTRIBUTE_BY_NONE)
|
||||
{
|
||||
return REPLICATION_MODEL_2PC;
|
||||
}
|
||||
else if (pg_strncasecmp(colocateWithTableName, "default", NAMEDATALEN) != 0 &&
|
||||
Assert(distributionMethod != DISTRIBUTE_BY_NONE);
|
||||
|
||||
if (!IsColocateWithDefault(colocateWithTableName) &&
|
||||
!IsColocateWithNone(colocateWithTableName))
|
||||
{
|
||||
text *colocateWithTableNameText = cstring_to_text(colocateWithTableName);
|
||||
|
@ -1491,28 +1668,34 @@ CreateHashDistributedTableShards(Oid relationId, int shardCount,
|
|||
|
||||
|
||||
/*
|
||||
* ColocationIdForNewTable returns a colocation id for hash-distributed table
|
||||
* ColocationIdForNewTable returns a colocation id for given table
|
||||
* according to given configuration. If there is no such configuration, it
|
||||
* creates one and returns colocation id of newly the created colocation group.
|
||||
* Note that DistributedTableParams and the distribution column Var should be
|
||||
* non-null only if CitusTableType corresponds to a distributed table.
|
||||
*
|
||||
* For append and range distributed tables, this function errors out if
|
||||
* colocateWithTableName parameter is not NULL, otherwise directly returns
|
||||
* INVALID_COLOCATION_ID.
|
||||
*
|
||||
* For reference tables, returns the common reference table colocation id.
|
||||
*
|
||||
* This function assumes its caller take necessary lock on relationId to
|
||||
* prevent possible changes on it.
|
||||
*/
|
||||
static uint32
|
||||
ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
|
||||
char distributionMethod, char replicationModel,
|
||||
int shardCount, bool shardCountIsStrict,
|
||||
char *colocateWithTableName)
|
||||
ColocationIdForNewTable(Oid relationId, CitusTableType tableType,
|
||||
DistributedTableParams *distributedTableParams,
|
||||
Var *distributionColumn)
|
||||
{
|
||||
CitusTableParams citusTableParams = DecideCitusTableParams(tableType,
|
||||
distributedTableParams);
|
||||
|
||||
uint32 colocationId = INVALID_COLOCATION_ID;
|
||||
|
||||
if (distributionMethod == DISTRIBUTE_BY_APPEND ||
|
||||
distributionMethod == DISTRIBUTE_BY_RANGE)
|
||||
if (tableType == APPEND_DISTRIBUTED || tableType == RANGE_DISTRIBUTED)
|
||||
{
|
||||
if (pg_strncasecmp(colocateWithTableName, "default", NAMEDATALEN) != 0)
|
||||
if (!IsColocateWithDefault(distributedTableParams->colocateWithTableName))
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("cannot distribute relation"),
|
||||
|
@ -1522,7 +1705,7 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
|
|||
|
||||
return colocationId;
|
||||
}
|
||||
else if (distributionMethod == DISTRIBUTE_BY_NONE)
|
||||
else if (tableType == REFERENCE_TABLE)
|
||||
{
|
||||
return CreateReferenceTableColocationId();
|
||||
}
|
||||
|
@ -1533,27 +1716,29 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
|
|||
* can be sure that there will no modifications on the colocation table
|
||||
* until this transaction is committed.
|
||||
*/
|
||||
Assert(distributionMethod == DISTRIBUTE_BY_HASH);
|
||||
Assert(citusTableParams.distributionMethod == DISTRIBUTE_BY_HASH);
|
||||
|
||||
Oid distributionColumnType = distributionColumn->vartype;
|
||||
Oid distributionColumnCollation = get_typcollation(distributionColumnType);
|
||||
|
||||
/* get an advisory lock to serialize concurrent default group creations */
|
||||
if (IsColocateWithDefault(colocateWithTableName))
|
||||
if (IsColocateWithDefault(distributedTableParams->colocateWithTableName))
|
||||
{
|
||||
AcquireColocationDefaultLock();
|
||||
}
|
||||
|
||||
colocationId = FindColocateWithColocationId(relationId,
|
||||
replicationModel,
|
||||
citusTableParams.replicationModel,
|
||||
distributionColumnType,
|
||||
distributionColumnCollation,
|
||||
shardCount,
|
||||
distributedTableParams->shardCount,
|
||||
distributedTableParams->
|
||||
shardCountIsStrict,
|
||||
distributedTableParams->
|
||||
colocateWithTableName);
|
||||
|
||||
if (IsColocateWithDefault(colocateWithTableName) && (colocationId !=
|
||||
INVALID_COLOCATION_ID))
|
||||
if (IsColocateWithDefault(distributedTableParams->colocateWithTableName) &&
|
||||
(colocationId != INVALID_COLOCATION_ID))
|
||||
{
|
||||
/*
|
||||
* we can release advisory lock if there is already a default entry for given params;
|
||||
|
@ -1565,23 +1750,25 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
|
|||
|
||||
if (colocationId == INVALID_COLOCATION_ID)
|
||||
{
|
||||
if (IsColocateWithDefault(colocateWithTableName))
|
||||
if (IsColocateWithDefault(distributedTableParams->colocateWithTableName))
|
||||
{
|
||||
/*
|
||||
* Generate a new colocation ID and insert a pg_dist_colocation
|
||||
* record.
|
||||
*/
|
||||
colocationId = CreateColocationGroup(shardCount, ShardReplicationFactor,
|
||||
colocationId = CreateColocationGroup(distributedTableParams->shardCount,
|
||||
ShardReplicationFactor,
|
||||
distributionColumnType,
|
||||
distributionColumnCollation);
|
||||
}
|
||||
else if (IsColocateWithNone(colocateWithTableName))
|
||||
else if (IsColocateWithNone(distributedTableParams->colocateWithTableName))
|
||||
{
|
||||
/*
|
||||
* Generate a new colocation ID and insert a pg_dist_colocation
|
||||
* record.
|
||||
*/
|
||||
colocationId = CreateColocationGroup(shardCount, ShardReplicationFactor,
|
||||
colocationId = CreateColocationGroup(distributedTableParams->shardCount,
|
||||
ShardReplicationFactor,
|
||||
distributionColumnType,
|
||||
distributionColumnCollation);
|
||||
}
|
||||
|
@ -1608,6 +1795,8 @@ EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn,
|
|||
{
|
||||
Oid parentRelationId = InvalidOid;
|
||||
|
||||
ErrorIfTableHasUnsupportedIdentityColumn(relationId);
|
||||
|
||||
EnsureLocalTableEmptyIfNecessary(relationId, distributionMethod);
|
||||
|
||||
/* user really wants triggers? */
|
||||
|
@ -2219,12 +2408,12 @@ CopyLocalDataIntoShards(Oid distributedRelationId)
|
|||
EState *estate = CreateExecutorState();
|
||||
ExprContext *econtext = GetPerTupleExprContext(estate);
|
||||
econtext->ecxt_scantuple = slot;
|
||||
|
||||
const bool nonPublishableData = false;
|
||||
DestReceiver *copyDest =
|
||||
(DestReceiver *) CreateCitusCopyDestReceiver(distributedRelationId,
|
||||
columnNameList,
|
||||
partitionColumnIndex,
|
||||
estate, NULL);
|
||||
estate, NULL, nonPublishableData);
|
||||
|
||||
/* initialise state for writing to shards, we'll open connections on demand */
|
||||
copyDest->rStartup(copyDest, 0, tupleDescriptor);
|
||||
|
|
|
@ -29,16 +29,14 @@
|
|||
#include "storage/lmgr.h"
|
||||
#include "utils/lsyscache.h"
|
||||
|
||||
typedef bool (*AddressPredicate)(const ObjectAddress *);
|
||||
|
||||
static void EnsureDependenciesCanBeDistributed(const ObjectAddress *relationAddress);
|
||||
static void ErrorIfCircularDependencyExists(const ObjectAddress *objectAddress);
|
||||
static int ObjectAddressComparator(const void *a, const void *b);
|
||||
static List * FilterObjectAddressListByPredicate(List *objectAddressList,
|
||||
AddressPredicate predicate);
|
||||
static void EnsureDependenciesExistOnAllNodes(const ObjectAddress *target);
|
||||
static List * GetDependencyCreateDDLCommands(const ObjectAddress *dependency);
|
||||
static bool ShouldPropagateObject(const ObjectAddress *address);
|
||||
static char * DropTableIfExistsCommand(Oid relationId);
|
||||
|
||||
/*
|
||||
* EnsureDependenciesExistOnAllNodes finds all the dependencies that we support and makes
|
||||
|
@ -325,6 +323,21 @@ GetDistributableDependenciesForObject(const ObjectAddress *target)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* DropTableIfExistsCommand returns command to drop given table if exists.
|
||||
*/
|
||||
static char *
|
||||
DropTableIfExistsCommand(Oid relationId)
|
||||
{
|
||||
char *qualifiedRelationName = generate_qualified_relation_name(relationId);
|
||||
StringInfo dropTableCommand = makeStringInfo();
|
||||
appendStringInfo(dropTableCommand, "DROP TABLE IF EXISTS %s CASCADE",
|
||||
qualifiedRelationName);
|
||||
|
||||
return dropTableCommand->data;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetDependencyCreateDDLCommands returns a list (potentially empty or NIL) of ddl
|
||||
* commands to execute on a worker to create the object.
|
||||
|
@ -370,7 +383,7 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency)
|
|||
bool creatingShellTableOnRemoteNode = true;
|
||||
List *tableDDLCommands = GetFullTableCreationCommands(relationId,
|
||||
WORKER_NEXTVAL_SEQUENCE_DEFAULTS,
|
||||
INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS,
|
||||
INCLUDE_IDENTITY,
|
||||
creatingShellTableOnRemoteNode);
|
||||
TableDDLCommand *tableDDLCommand = NULL;
|
||||
foreach_ptr(tableDDLCommand, tableDDLCommands)
|
||||
|
@ -379,6 +392,10 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency)
|
|||
commandList = lappend(commandList, GetTableDDLCommand(
|
||||
tableDDLCommand));
|
||||
}
|
||||
|
||||
/* we need to drop table, if exists, first to make table creation idempotent */
|
||||
commandList = lcons(DropTableIfExistsCommand(relationId),
|
||||
commandList);
|
||||
}
|
||||
|
||||
return commandList;
|
||||
|
@ -438,6 +455,11 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency)
|
|||
return DDLCommands;
|
||||
}
|
||||
|
||||
case OCLASS_PUBLICATION:
|
||||
{
|
||||
return CreatePublicationDDLCommandsIdempotent(dependency);
|
||||
}
|
||||
|
||||
case OCLASS_ROLE:
|
||||
{
|
||||
return GenerateCreateOrAlterRoleCommand(dependency->objectId);
|
||||
|
@ -527,68 +549,6 @@ GetAllDependencyCreateDDLCommands(const List *dependencies)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ReplicateAllObjectsToNodeCommandList returns commands to replicate all
|
||||
* previously marked objects to a worker node. The function also sets
|
||||
* clusterHasDistributedFunction if there are any distributed functions.
|
||||
*/
|
||||
List *
|
||||
ReplicateAllObjectsToNodeCommandList(const char *nodeName, int nodePort)
|
||||
{
|
||||
/* since we are executing ddl commands disable propagation first, primarily for mx */
|
||||
List *ddlCommands = list_make1(DISABLE_DDL_PROPAGATION);
|
||||
|
||||
/*
|
||||
* collect all dependencies in creation order and get their ddl commands
|
||||
*/
|
||||
List *dependencies = GetDistributedObjectAddressList();
|
||||
|
||||
/*
|
||||
* Depending on changes in the environment, such as the enable_metadata_sync guc
|
||||
* there might be objects in the distributed object address list that should currently
|
||||
* not be propagated by citus as they are 'not supported'.
|
||||
*/
|
||||
dependencies = FilterObjectAddressListByPredicate(dependencies,
|
||||
&SupportedDependencyByCitus);
|
||||
|
||||
/*
|
||||
* When dependency lists are getting longer we see a delay in the creation time on the
|
||||
* workers. We would like to inform the user. Currently we warn for lists greater than
|
||||
* 100 items, where 100 is an arbitrarily chosen number. If we find it too high or too
|
||||
* low we can adjust this based on experience.
|
||||
*/
|
||||
if (list_length(dependencies) > 100)
|
||||
{
|
||||
ereport(NOTICE, (errmsg("Replicating postgres objects to node %s:%d", nodeName,
|
||||
nodePort),
|
||||
errdetail("There are %d objects to replicate, depending on your "
|
||||
"environment this might take a while",
|
||||
list_length(dependencies))));
|
||||
}
|
||||
|
||||
dependencies = OrderObjectAddressListInDependencyOrder(dependencies);
|
||||
ObjectAddress *dependency = NULL;
|
||||
foreach_ptr(dependency, dependencies)
|
||||
{
|
||||
if (IsAnyObjectAddressOwnedByExtension(list_make1(dependency), NULL))
|
||||
{
|
||||
/*
|
||||
* we expect extension-owned objects to be created as a result
|
||||
* of the extension being created.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
ddlCommands = list_concat(ddlCommands,
|
||||
GetDependencyCreateDDLCommands(dependency));
|
||||
}
|
||||
|
||||
ddlCommands = lappend(ddlCommands, ENABLE_DDL_PROPAGATION);
|
||||
|
||||
return ddlCommands;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ShouldPropagate determines if we should be propagating anything
|
||||
*/
|
||||
|
@ -744,7 +704,7 @@ ShouldPropagateAnyObject(List *addresses)
|
|||
* FilterObjectAddressListByPredicate takes a list of ObjectAddress *'s and returns a list
|
||||
* only containing the ObjectAddress *'s for which the predicate returned true.
|
||||
*/
|
||||
static List *
|
||||
List *
|
||||
FilterObjectAddressListByPredicate(List *objectAddressList, AddressPredicate predicate)
|
||||
{
|
||||
List *result = NIL;
|
||||
|
|
|
@ -245,6 +245,15 @@ static DistributeObjectOps Any_CreatePolicy = {
|
|||
.address = NULL,
|
||||
.markDistributed = false,
|
||||
};
|
||||
static DistributeObjectOps Any_CreatePublication = {
|
||||
.deparse = DeparseCreatePublicationStmt,
|
||||
.qualify = QualifyCreatePublicationStmt,
|
||||
.preprocess = NULL,
|
||||
.postprocess = PostProcessCreatePublicationStmt,
|
||||
.operationType = DIST_OPS_CREATE,
|
||||
.address = CreatePublicationStmtObjectAddress,
|
||||
.markDistributed = true,
|
||||
};
|
||||
static DistributeObjectOps Any_CreateRole = {
|
||||
.deparse = DeparseCreateRoleStmt,
|
||||
.qualify = NULL,
|
||||
|
@ -707,6 +716,45 @@ static DistributeObjectOps Procedure_Rename = {
|
|||
.address = RenameFunctionStmtObjectAddress,
|
||||
.markDistributed = false,
|
||||
};
|
||||
static DistributeObjectOps Publication_Alter = {
|
||||
.deparse = DeparseAlterPublicationStmt,
|
||||
.qualify = QualifyAlterPublicationStmt,
|
||||
.preprocess = PreprocessAlterPublicationStmt,
|
||||
.postprocess = PostprocessAlterDistributedObjectStmt,
|
||||
.objectType = OBJECT_PUBLICATION,
|
||||
.operationType = DIST_OPS_ALTER,
|
||||
.address = AlterPublicationStmtObjectAddress,
|
||||
.markDistributed = false,
|
||||
};
|
||||
static DistributeObjectOps Publication_AlterOwner = {
|
||||
.deparse = DeparseAlterPublicationOwnerStmt,
|
||||
.qualify = NULL,
|
||||
.preprocess = PreprocessAlterDistributedObjectStmt,
|
||||
.postprocess = PostprocessAlterDistributedObjectStmt,
|
||||
.objectType = OBJECT_PUBLICATION,
|
||||
.operationType = DIST_OPS_ALTER,
|
||||
.address = AlterPublicationOwnerStmtObjectAddress,
|
||||
.markDistributed = false,
|
||||
};
|
||||
static DistributeObjectOps Publication_Drop = {
|
||||
.deparse = DeparseDropPublicationStmt,
|
||||
.qualify = NULL,
|
||||
.preprocess = PreprocessDropDistributedObjectStmt,
|
||||
.postprocess = NULL,
|
||||
.operationType = DIST_OPS_DROP,
|
||||
.address = NULL,
|
||||
.markDistributed = false,
|
||||
};
|
||||
static DistributeObjectOps Publication_Rename = {
|
||||
.deparse = DeparseRenamePublicationStmt,
|
||||
.qualify = NULL,
|
||||
.preprocess = PreprocessAlterDistributedObjectStmt,
|
||||
.postprocess = NULL,
|
||||
.objectType = OBJECT_PUBLICATION,
|
||||
.operationType = DIST_OPS_ALTER,
|
||||
.address = RenamePublicationStmtObjectAddress,
|
||||
.markDistributed = false,
|
||||
};
|
||||
static DistributeObjectOps Routine_AlterObjectDepends = {
|
||||
.deparse = DeparseAlterFunctionDependsStmt,
|
||||
.qualify = QualifyAlterFunctionDependsStmt,
|
||||
|
@ -1399,6 +1447,11 @@ GetDistributeObjectOps(Node *node)
|
|||
return &Procedure_AlterOwner;
|
||||
}
|
||||
|
||||
case OBJECT_PUBLICATION:
|
||||
{
|
||||
return &Publication_AlterOwner;
|
||||
}
|
||||
|
||||
case OBJECT_ROUTINE:
|
||||
{
|
||||
return &Routine_AlterOwner;
|
||||
|
@ -1436,6 +1489,11 @@ GetDistributeObjectOps(Node *node)
|
|||
return &Any_AlterPolicy;
|
||||
}
|
||||
|
||||
case T_AlterPublicationStmt:
|
||||
{
|
||||
return &Publication_Alter;
|
||||
}
|
||||
|
||||
case T_AlterRoleStmt:
|
||||
{
|
||||
return &Any_AlterRole;
|
||||
|
@ -1610,6 +1668,11 @@ GetDistributeObjectOps(Node *node)
|
|||
return &Any_CreatePolicy;
|
||||
}
|
||||
|
||||
case T_CreatePublicationStmt:
|
||||
{
|
||||
return &Any_CreatePublication;
|
||||
}
|
||||
|
||||
case T_CreateRoleStmt:
|
||||
{
|
||||
return &Any_CreateRole;
|
||||
|
@ -1722,6 +1785,11 @@ GetDistributeObjectOps(Node *node)
|
|||
return &Procedure_Drop;
|
||||
}
|
||||
|
||||
case OBJECT_PUBLICATION:
|
||||
{
|
||||
return &Publication_Drop;
|
||||
}
|
||||
|
||||
case OBJECT_ROUTINE:
|
||||
{
|
||||
return &Routine_Drop;
|
||||
|
@ -1901,6 +1969,11 @@ GetDistributeObjectOps(Node *node)
|
|||
return &Procedure_Rename;
|
||||
}
|
||||
|
||||
case OBJECT_PUBLICATION:
|
||||
{
|
||||
return &Publication_Rename;
|
||||
}
|
||||
|
||||
case OBJECT_ROUTINE:
|
||||
{
|
||||
return &Routine_Rename;
|
||||
|
|
|
@ -221,7 +221,8 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
|
|||
if (!referencedIsCitus && !selfReferencingTable)
|
||||
{
|
||||
if (IsCitusLocalTableByDistParams(referencingDistMethod,
|
||||
referencingReplicationModel))
|
||||
referencingReplicationModel,
|
||||
referencingColocationId))
|
||||
{
|
||||
ErrorOutForFKeyBetweenPostgresAndCitusLocalTable(referencedTableId);
|
||||
}
|
||||
|
@ -245,8 +246,7 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
|
|||
if (!selfReferencingTable)
|
||||
{
|
||||
referencedDistMethod = PartitionMethod(referencedTableId);
|
||||
referencedDistKey = IsCitusTableType(referencedTableId,
|
||||
CITUS_TABLE_WITH_NO_DIST_KEY) ?
|
||||
referencedDistKey = !HasDistributionKey(referencedTableId) ?
|
||||
NULL :
|
||||
DistPartitionKey(referencedTableId);
|
||||
referencedColocationId = TableColocationId(referencedTableId);
|
||||
|
@ -278,9 +278,17 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
|
|||
}
|
||||
|
||||
bool referencingIsCitusLocalOrRefTable =
|
||||
(referencingDistMethod == DISTRIBUTE_BY_NONE);
|
||||
IsCitusLocalTableByDistParams(referencingDistMethod,
|
||||
referencingReplicationModel,
|
||||
referencingColocationId) ||
|
||||
IsReferenceTableByDistParams(referencingDistMethod,
|
||||
referencingReplicationModel);
|
||||
bool referencedIsCitusLocalOrRefTable =
|
||||
(referencedDistMethod == DISTRIBUTE_BY_NONE);
|
||||
IsCitusLocalTableByDistParams(referencedDistMethod,
|
||||
referencedReplicationModel,
|
||||
referencedColocationId) ||
|
||||
IsReferenceTableByDistParams(referencedDistMethod,
|
||||
referencedReplicationModel);
|
||||
if (referencingIsCitusLocalOrRefTable && referencedIsCitusLocalOrRefTable)
|
||||
{
|
||||
EnsureSupportedFKeyBetweenCitusLocalAndRefTable(constraintForm,
|
||||
|
@ -313,7 +321,8 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
|
|||
* reference table is referenced.
|
||||
*/
|
||||
bool referencedIsReferenceTable =
|
||||
(referencedReplicationModel == REPLICATION_MODEL_2PC);
|
||||
IsReferenceTableByDistParams(referencedDistMethod,
|
||||
referencedReplicationModel);
|
||||
if (!referencedIsReferenceTable && (
|
||||
referencingColocationId == INVALID_COLOCATION_ID ||
|
||||
referencingColocationId != referencedColocationId))
|
||||
|
|
|
@ -1190,7 +1190,7 @@ ErrorIfUnsupportedIndexStmt(IndexStmt *createIndexStatement)
|
|||
* Non-distributed tables do not have partition key, and unique constraints
|
||||
* are allowed for them. Thus, we added a short-circuit for non-distributed tables.
|
||||
*/
|
||||
if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
if (!HasDistributionKey(relationId))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include "distributed/local_multi_copy.h"
|
||||
#include "distributed/shard_utils.h"
|
||||
#include "distributed/version_compat.h"
|
||||
#include "distributed/replication_origin_session_utils.h"
|
||||
|
||||
/* managed via GUC, default is 512 kB */
|
||||
int LocalCopyFlushThresholdByte = 512 * 1024;
|
||||
|
@ -46,7 +47,7 @@ static void AddSlotToBuffer(TupleTableSlot *slot, CitusCopyDestReceiver *copyDes
|
|||
static bool ShouldAddBinaryHeaders(StringInfo buffer, bool isBinary);
|
||||
static bool ShouldSendCopyNow(StringInfo buffer);
|
||||
static void DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId,
|
||||
CopyStmt *copyStatement, bool isEndOfCopy);
|
||||
CopyStmt *copyStatement, bool isEndOfCopy, bool isPublishable);
|
||||
static int ReadFromLocalBufferCallback(void *outBuf, int minRead, int maxRead);
|
||||
|
||||
|
||||
|
@ -94,7 +95,7 @@ WriteTupleToLocalShard(TupleTableSlot *slot, CitusCopyDestReceiver *copyDest, in
|
|||
bool isEndOfCopy = false;
|
||||
DoLocalCopy(localCopyOutState->fe_msgbuf, copyDest->distributedRelationId,
|
||||
shardId,
|
||||
copyDest->copyStatement, isEndOfCopy);
|
||||
copyDest->copyStatement, isEndOfCopy, copyDest->isPublishable);
|
||||
resetStringInfo(localCopyOutState->fe_msgbuf);
|
||||
}
|
||||
}
|
||||
|
@ -133,7 +134,7 @@ FinishLocalCopyToShard(CitusCopyDestReceiver *copyDest, int64 shardId,
|
|||
}
|
||||
bool isEndOfCopy = true;
|
||||
DoLocalCopy(localCopyOutState->fe_msgbuf, copyDest->distributedRelationId, shardId,
|
||||
copyDest->copyStatement, isEndOfCopy);
|
||||
copyDest->copyStatement, isEndOfCopy, copyDest->isPublishable);
|
||||
}
|
||||
|
||||
|
||||
|
@ -197,7 +198,7 @@ ShouldSendCopyNow(StringInfo buffer)
|
|||
*/
|
||||
static void
|
||||
DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId, CopyStmt *copyStatement,
|
||||
bool isEndOfCopy)
|
||||
bool isEndOfCopy, bool isPublishable)
|
||||
{
|
||||
/*
|
||||
* Set the buffer as a global variable to allow ReadFromLocalBufferCallback
|
||||
|
@ -205,6 +206,10 @@ DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId, CopyStmt *copyStat
|
|||
* ReadFromLocalBufferCallback.
|
||||
*/
|
||||
LocalCopyBuffer = buffer;
|
||||
if (!isPublishable)
|
||||
{
|
||||
SetupReplicationOriginLocalSession();
|
||||
}
|
||||
|
||||
Oid shardOid = GetTableLocalShardOid(relationId, shardId);
|
||||
Relation shard = table_open(shardOid, RowExclusiveLock);
|
||||
|
@ -219,6 +224,10 @@ DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId, CopyStmt *copyStat
|
|||
EndCopyFrom(cstate);
|
||||
|
||||
table_close(shard, NoLock);
|
||||
if (!isPublishable)
|
||||
{
|
||||
ResetReplicationOriginLocalSession();
|
||||
}
|
||||
free_parsestate(pState);
|
||||
}
|
||||
|
||||
|
|
|
@ -85,6 +85,7 @@
|
|||
#include "distributed/relation_access_tracking.h"
|
||||
#include "distributed/remote_commands.h"
|
||||
#include "distributed/remote_transaction.h"
|
||||
#include "distributed/replication_origin_session_utils.h"
|
||||
#include "distributed/resource_lock.h"
|
||||
#include "distributed/shard_pruning.h"
|
||||
#include "distributed/shared_connection_stats.h"
|
||||
|
@ -270,7 +271,8 @@ static CopyConnectionState * GetConnectionState(HTAB *connectionStateHash,
|
|||
static CopyShardState * GetShardState(uint64 shardId, HTAB *shardStateHash,
|
||||
HTAB *connectionStateHash,
|
||||
bool *found, bool shouldUseLocalCopy, CopyOutState
|
||||
copyOutState, bool isColocatedIntermediateResult);
|
||||
copyOutState, bool isColocatedIntermediateResult,
|
||||
bool isPublishable);
|
||||
static MultiConnection * CopyGetPlacementConnection(HTAB *connectionStateHash,
|
||||
ShardPlacement *placement,
|
||||
bool colocatedIntermediateResult);
|
||||
|
@ -285,7 +287,8 @@ static void InitializeCopyShardState(CopyShardState *shardState,
|
|||
uint64 shardId,
|
||||
bool canUseLocalCopy,
|
||||
CopyOutState copyOutState,
|
||||
bool colocatedIntermediateResult);
|
||||
bool colocatedIntermediateResult, bool
|
||||
isPublishable);
|
||||
static void StartPlacementStateCopyCommand(CopyPlacementState *placementState,
|
||||
CopyStmt *copyStatement,
|
||||
CopyOutState copyOutState);
|
||||
|
@ -393,7 +396,7 @@ CitusCopyFrom(CopyStmt *copyStatement, QueryCompletion *completionTag)
|
|||
if (IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) ||
|
||||
IsCitusTableTypeCacheEntry(cacheEntry, RANGE_DISTRIBUTED) ||
|
||||
IsCitusTableTypeCacheEntry(cacheEntry, APPEND_DISTRIBUTED) ||
|
||||
IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
!HasDistributionKeyCacheEntry(cacheEntry))
|
||||
{
|
||||
CopyToExistingShards(copyStatement, completionTag);
|
||||
}
|
||||
|
@ -492,9 +495,11 @@ CopyToExistingShards(CopyStmt *copyStatement, QueryCompletion *completionTag)
|
|||
ExprContext *executorExpressionContext = GetPerTupleExprContext(executorState);
|
||||
|
||||
/* set up the destination for the COPY */
|
||||
const bool publishableData = true;
|
||||
CitusCopyDestReceiver *copyDest = CreateCitusCopyDestReceiver(tableId, columnNameList,
|
||||
partitionColumnIndex,
|
||||
executorState, NULL);
|
||||
executorState, NULL,
|
||||
publishableData);
|
||||
|
||||
/* if the user specified an explicit append-to_shard option, write to it */
|
||||
uint64 appendShardId = ProcessAppendToShardOption(tableId, copyStatement);
|
||||
|
@ -1934,7 +1939,7 @@ CopyFlushOutput(CopyOutState cstate, char *start, char *pointer)
|
|||
CitusCopyDestReceiver *
|
||||
CreateCitusCopyDestReceiver(Oid tableId, List *columnNameList, int partitionColumnIndex,
|
||||
EState *executorState,
|
||||
char *intermediateResultIdPrefix)
|
||||
char *intermediateResultIdPrefix, bool isPublishable)
|
||||
{
|
||||
CitusCopyDestReceiver *copyDest = (CitusCopyDestReceiver *) palloc0(
|
||||
sizeof(CitusCopyDestReceiver));
|
||||
|
@ -1953,6 +1958,7 @@ CreateCitusCopyDestReceiver(Oid tableId, List *columnNameList, int partitionColu
|
|||
copyDest->executorState = executorState;
|
||||
copyDest->colocatedIntermediateResultIdPrefix = intermediateResultIdPrefix;
|
||||
copyDest->memoryContext = CurrentMemoryContext;
|
||||
copyDest->isPublishable = isPublishable;
|
||||
|
||||
return copyDest;
|
||||
}
|
||||
|
@ -2318,7 +2324,9 @@ CitusSendTupleToPlacements(TupleTableSlot *slot, CitusCopyDestReceiver *copyDest
|
|||
&cachedShardStateFound,
|
||||
copyDest->shouldUseLocalCopy,
|
||||
copyDest->copyOutState,
|
||||
isColocatedIntermediateResult);
|
||||
isColocatedIntermediateResult,
|
||||
copyDest->isPublishable);
|
||||
|
||||
if (!cachedShardStateFound)
|
||||
{
|
||||
firstTupleInShard = true;
|
||||
|
@ -2751,6 +2759,11 @@ ShutdownCopyConnectionState(CopyConnectionState *connectionState,
|
|||
if (activePlacementState != NULL)
|
||||
{
|
||||
EndPlacementStateCopyCommand(activePlacementState, copyOutState);
|
||||
if (!copyDest->isPublishable)
|
||||
{
|
||||
ResetReplicationOriginRemoteSession(
|
||||
activePlacementState->connectionState->connection);
|
||||
}
|
||||
}
|
||||
|
||||
dlist_foreach(iter, &connectionState->bufferedPlacementList)
|
||||
|
@ -2764,6 +2777,10 @@ ShutdownCopyConnectionState(CopyConnectionState *connectionState,
|
|||
SendCopyDataToPlacement(placementState->data, shardId,
|
||||
connectionState->connection);
|
||||
EndPlacementStateCopyCommand(placementState, copyOutState);
|
||||
if (!copyDest->isPublishable)
|
||||
{
|
||||
ResetReplicationOriginRemoteSession(connectionState->connection);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3436,7 +3453,7 @@ static CopyShardState *
|
|||
GetShardState(uint64 shardId, HTAB *shardStateHash,
|
||||
HTAB *connectionStateHash, bool *found, bool
|
||||
shouldUseLocalCopy, CopyOutState copyOutState,
|
||||
bool isColocatedIntermediateResult)
|
||||
bool isColocatedIntermediateResult, bool isPublishable)
|
||||
{
|
||||
CopyShardState *shardState = (CopyShardState *) hash_search(shardStateHash, &shardId,
|
||||
HASH_ENTER, found);
|
||||
|
@ -3444,7 +3461,8 @@ GetShardState(uint64 shardId, HTAB *shardStateHash,
|
|||
{
|
||||
InitializeCopyShardState(shardState, connectionStateHash,
|
||||
shardId, shouldUseLocalCopy,
|
||||
copyOutState, isColocatedIntermediateResult);
|
||||
copyOutState, isColocatedIntermediateResult,
|
||||
isPublishable);
|
||||
}
|
||||
|
||||
return shardState;
|
||||
|
@ -3461,7 +3479,8 @@ InitializeCopyShardState(CopyShardState *shardState,
|
|||
HTAB *connectionStateHash, uint64 shardId,
|
||||
bool shouldUseLocalCopy,
|
||||
CopyOutState copyOutState,
|
||||
bool colocatedIntermediateResult)
|
||||
bool colocatedIntermediateResult,
|
||||
bool isPublishable)
|
||||
{
|
||||
ListCell *placementCell = NULL;
|
||||
int failedPlacementCount = 0;
|
||||
|
@ -3532,6 +3551,11 @@ InitializeCopyShardState(CopyShardState *shardState,
|
|||
RemoteTransactionBeginIfNecessary(connection);
|
||||
}
|
||||
|
||||
if (!isPublishable)
|
||||
{
|
||||
SetupReplicationOriginRemoteSession(connection);
|
||||
}
|
||||
|
||||
CopyPlacementState *placementState = palloc0(sizeof(CopyPlacementState));
|
||||
placementState->shardState = shardState;
|
||||
placementState->data = makeStringInfo();
|
||||
|
|
|
@ -0,0 +1,634 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* publication.c
|
||||
* Commands for creating publications
|
||||
*
|
||||
* Copyright (c) Citus Data, Inc.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
#include "miscadmin.h"
|
||||
|
||||
#include "catalog/pg_publication.h"
|
||||
#include "catalog/pg_publication_rel.h"
|
||||
#include "distributed/commands.h"
|
||||
#include "distributed/deparser.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/metadata_utility.h"
|
||||
#include "distributed/metadata_sync.h"
|
||||
#include "distributed/metadata/distobject.h"
|
||||
#include "distributed/reference_table_utils.h"
|
||||
#include "distributed/worker_create_or_replace.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
#include "nodes/parsenodes.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/syscache.h"
|
||||
|
||||
#include "pg_version_compat.h"
|
||||
|
||||
|
||||
static CreatePublicationStmt * BuildCreatePublicationStmt(Oid publicationId);
|
||||
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||
static PublicationObjSpec * BuildPublicationRelationObjSpec(Oid relationId,
|
||||
Oid publicationId,
|
||||
bool tableOnly);
|
||||
#endif
|
||||
static void AppendPublishOptionList(StringInfo str, List *strings);
|
||||
static char * AlterPublicationOwnerCommand(Oid publicationId);
|
||||
static bool ShouldPropagateCreatePublication(CreatePublicationStmt *stmt);
|
||||
static List * ObjectAddressForPublicationName(char *publicationName, bool missingOk);
|
||||
|
||||
|
||||
/*
|
||||
* PostProcessCreatePublicationStmt handles CREATE PUBLICATION statements
|
||||
* that contain distributed tables.
|
||||
*/
|
||||
List *
|
||||
PostProcessCreatePublicationStmt(Node *node, const char *queryString)
|
||||
{
|
||||
CreatePublicationStmt *stmt = castNode(CreatePublicationStmt, node);
|
||||
|
||||
if (!ShouldPropagateCreatePublication(stmt))
|
||||
{
|
||||
/* should not propagate right now */
|
||||
return NIL;
|
||||
}
|
||||
|
||||
/* call into CreatePublicationStmtObjectAddress */
|
||||
List *publicationAddresses = GetObjectAddressListFromParseTree(node, false, true);
|
||||
|
||||
/* the code-path only supports a single object */
|
||||
Assert(list_length(publicationAddresses) == 1);
|
||||
|
||||
if (IsAnyObjectAddressOwnedByExtension(publicationAddresses, NULL))
|
||||
{
|
||||
/* should not propagate publications owned by extensions */
|
||||
return NIL;
|
||||
}
|
||||
|
||||
EnsureAllObjectDependenciesExistOnAllNodes(publicationAddresses);
|
||||
|
||||
const ObjectAddress *pubAddress = linitial(publicationAddresses);
|
||||
|
||||
List *commands = NIL;
|
||||
commands = lappend(commands, DISABLE_DDL_PROPAGATION);
|
||||
commands = lappend(commands, CreatePublicationDDLCommand(pubAddress->objectId));
|
||||
commands = lappend(commands, ENABLE_DDL_PROPAGATION);
|
||||
|
||||
return NodeDDLTaskList(NON_COORDINATOR_NODES, commands);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CreatePublicationDDLCommandsIdempotent returns a list of DDL statements to be
|
||||
* executed on a node to recreate the publication addressed by the publicationAddress.
|
||||
*/
|
||||
List *
|
||||
CreatePublicationDDLCommandsIdempotent(const ObjectAddress *publicationAddress)
|
||||
{
|
||||
Assert(publicationAddress->classId == PublicationRelationId);
|
||||
|
||||
char *ddlCommand =
|
||||
CreatePublicationDDLCommand(publicationAddress->objectId);
|
||||
|
||||
char *alterPublicationOwnerSQL =
|
||||
AlterPublicationOwnerCommand(publicationAddress->objectId);
|
||||
|
||||
return list_make2(
|
||||
WrapCreateOrReplace(ddlCommand),
|
||||
alterPublicationOwnerSQL);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CreatePublicationDDLCommand returns the CREATE PUBLICATION string that
|
||||
* can be used to recreate a given publication.
|
||||
*/
|
||||
char *
|
||||
CreatePublicationDDLCommand(Oid publicationId)
|
||||
{
|
||||
CreatePublicationStmt *createPubStmt = BuildCreatePublicationStmt(publicationId);
|
||||
|
||||
/* we took the WHERE clause from the catalog where it is already transformed */
|
||||
bool whereClauseRequiresTransform = false;
|
||||
|
||||
/* only propagate Citus tables in publication */
|
||||
bool includeLocalTables = false;
|
||||
|
||||
return DeparseCreatePublicationStmtExtended((Node *) createPubStmt,
|
||||
whereClauseRequiresTransform,
|
||||
includeLocalTables);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* BuildCreatePublicationStmt constructs a CreatePublicationStmt struct for the
|
||||
* given publication.
|
||||
*/
|
||||
static CreatePublicationStmt *
|
||||
BuildCreatePublicationStmt(Oid publicationId)
|
||||
{
|
||||
CreatePublicationStmt *createPubStmt = makeNode(CreatePublicationStmt);
|
||||
|
||||
HeapTuple publicationTuple =
|
||||
SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(publicationId));
|
||||
|
||||
if (!HeapTupleIsValid(publicationTuple))
|
||||
{
|
||||
ereport(ERROR, (errmsg("cannot find publication with oid: %d", publicationId)));
|
||||
}
|
||||
|
||||
Form_pg_publication publicationForm =
|
||||
(Form_pg_publication) GETSTRUCT(publicationTuple);
|
||||
|
||||
/* CREATE PUBLICATION <name> */
|
||||
createPubStmt->pubname = pstrdup(NameStr(publicationForm->pubname));
|
||||
|
||||
/* FOR ALL TABLES */
|
||||
createPubStmt->for_all_tables = publicationForm->puballtables;
|
||||
|
||||
ReleaseSysCache(publicationTuple);
|
||||
|
||||
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||
List *schemaIds = GetPublicationSchemas(publicationId);
|
||||
Oid schemaId = InvalidOid;
|
||||
|
||||
foreach_oid(schemaId, schemaIds)
|
||||
{
|
||||
char *schemaName = get_namespace_name(schemaId);
|
||||
|
||||
PublicationObjSpec *publicationObject = makeNode(PublicationObjSpec);
|
||||
publicationObject->pubobjtype = PUBLICATIONOBJ_TABLES_IN_SCHEMA;
|
||||
publicationObject->pubtable = NULL;
|
||||
publicationObject->name = schemaName;
|
||||
publicationObject->location = -1;
|
||||
|
||||
createPubStmt->pubobjects = lappend(createPubStmt->pubobjects, publicationObject);
|
||||
}
|
||||
#endif
|
||||
|
||||
List *relationIds = GetPublicationRelations(publicationId,
|
||||
publicationForm->pubviaroot ?
|
||||
PUBLICATION_PART_ROOT :
|
||||
PUBLICATION_PART_LEAF);
|
||||
Oid relationId = InvalidOid;
|
||||
int citusTableCount PG_USED_FOR_ASSERTS_ONLY = 0;
|
||||
|
||||
/* mainly for consistent ordering in test output */
|
||||
relationIds = SortList(relationIds, CompareOids);
|
||||
|
||||
foreach_oid(relationId, relationIds)
|
||||
{
|
||||
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||
bool tableOnly = false;
|
||||
|
||||
/* since postgres 15, tables can have a column list and filter */
|
||||
PublicationObjSpec *publicationObject =
|
||||
BuildPublicationRelationObjSpec(relationId, publicationId, tableOnly);
|
||||
|
||||
createPubStmt->pubobjects = lappend(createPubStmt->pubobjects, publicationObject);
|
||||
#else
|
||||
|
||||
/* before postgres 15, only full tables are supported */
|
||||
char *schemaName = get_namespace_name(get_rel_namespace(relationId));
|
||||
char *tableName = get_rel_name(relationId);
|
||||
RangeVar *rangeVar = makeRangeVar(schemaName, tableName, -1);
|
||||
|
||||
createPubStmt->tables = lappend(createPubStmt->tables, rangeVar);
|
||||
#endif
|
||||
|
||||
if (IsCitusTable(relationId))
|
||||
{
|
||||
citusTableCount++;
|
||||
}
|
||||
}
|
||||
|
||||
/* WITH (publish_via_partition_root = true) option */
|
||||
bool publishViaRoot = publicationForm->pubviaroot;
|
||||
char *publishViaRootString = publishViaRoot ? "true" : "false";
|
||||
DefElem *pubViaRootOption = makeDefElem("publish_via_partition_root",
|
||||
(Node *) makeString(publishViaRootString),
|
||||
-1);
|
||||
createPubStmt->options = lappend(createPubStmt->options, pubViaRootOption);
|
||||
|
||||
/* WITH (publish = 'insert, update, delete, truncate') option */
|
||||
List *publishList = NIL;
|
||||
|
||||
if (publicationForm->pubinsert)
|
||||
{
|
||||
publishList = lappend(publishList, makeString("insert"));
|
||||
}
|
||||
|
||||
if (publicationForm->pubupdate)
|
||||
{
|
||||
publishList = lappend(publishList, makeString("update"));
|
||||
}
|
||||
|
||||
if (publicationForm->pubdelete)
|
||||
{
|
||||
publishList = lappend(publishList, makeString("delete"));
|
||||
}
|
||||
|
||||
if (publicationForm->pubtruncate)
|
||||
{
|
||||
publishList = lappend(publishList, makeString("truncate"));
|
||||
}
|
||||
|
||||
if (list_length(publishList) > 0)
|
||||
{
|
||||
StringInfo optionValue = makeStringInfo();
|
||||
AppendPublishOptionList(optionValue, publishList);
|
||||
|
||||
DefElem *publishOption = makeDefElem("publish",
|
||||
(Node *) makeString(optionValue->data), -1);
|
||||
createPubStmt->options = lappend(createPubStmt->options, publishOption);
|
||||
}
|
||||
|
||||
|
||||
return createPubStmt;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendPublishOptionList appends a list of publication options in
|
||||
* comma-separate form.
|
||||
*/
|
||||
static void
|
||||
AppendPublishOptionList(StringInfo str, List *options)
|
||||
{
|
||||
ListCell *stringCell = NULL;
|
||||
foreach(stringCell, options)
|
||||
{
|
||||
const char *string = strVal(lfirst(stringCell));
|
||||
if (stringCell != list_head(options))
|
||||
{
|
||||
appendStringInfoString(str, ", ");
|
||||
}
|
||||
|
||||
/* we cannot escape these strings */
|
||||
appendStringInfoString(str, string);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||
|
||||
/*
|
||||
* BuildPublicationRelationObjSpec returns a PublicationObjSpec that
|
||||
* can be included in a CREATE or ALTER PUBLICATION statement.
|
||||
*/
|
||||
static PublicationObjSpec *
|
||||
BuildPublicationRelationObjSpec(Oid relationId, Oid publicationId,
|
||||
bool tableOnly)
|
||||
{
|
||||
HeapTuple pubRelationTuple = SearchSysCache2(PUBLICATIONRELMAP,
|
||||
ObjectIdGetDatum(relationId),
|
||||
ObjectIdGetDatum(publicationId));
|
||||
if (!HeapTupleIsValid(pubRelationTuple))
|
||||
{
|
||||
ereport(ERROR, (errmsg("cannot find relation with oid %d in publication "
|
||||
"with oid %d", relationId, publicationId)));
|
||||
}
|
||||
|
||||
List *columnNameList = NIL;
|
||||
Node *whereClause = NULL;
|
||||
|
||||
/* build the column list */
|
||||
if (!tableOnly)
|
||||
{
|
||||
bool isNull = false;
|
||||
Datum attributesDatum = SysCacheGetAttr(PUBLICATIONRELMAP, pubRelationTuple,
|
||||
Anum_pg_publication_rel_prattrs,
|
||||
&isNull);
|
||||
if (!isNull)
|
||||
{
|
||||
ArrayType *attributesArray = DatumGetArrayTypeP(attributesDatum);
|
||||
int attributeCount = ARR_DIMS(attributesArray)[0];
|
||||
int16 *elems = (int16 *) ARR_DATA_PTR(attributesArray);
|
||||
|
||||
for (int attNumIndex = 0; attNumIndex < attributeCount; attNumIndex++)
|
||||
{
|
||||
AttrNumber attributeNumber = elems[attNumIndex];
|
||||
char *columnName = get_attname(relationId, attributeNumber, false);
|
||||
|
||||
columnNameList = lappend(columnNameList, makeString(columnName));
|
||||
}
|
||||
}
|
||||
|
||||
/* build the WHERE clause */
|
||||
Datum whereClauseDatum = SysCacheGetAttr(PUBLICATIONRELMAP, pubRelationTuple,
|
||||
Anum_pg_publication_rel_prqual,
|
||||
&isNull);
|
||||
if (!isNull)
|
||||
{
|
||||
/*
|
||||
* We use the already-transformed parse tree form here, which does
|
||||
* not match regular CreatePublicationStmt
|
||||
*/
|
||||
whereClause = stringToNode(TextDatumGetCString(whereClauseDatum));
|
||||
}
|
||||
}
|
||||
|
||||
ReleaseSysCache(pubRelationTuple);
|
||||
|
||||
char *schemaName = get_namespace_name(get_rel_namespace(relationId));
|
||||
char *tableName = get_rel_name(relationId);
|
||||
RangeVar *rangeVar = makeRangeVar(schemaName, tableName, -1);
|
||||
|
||||
/* build the FOR TABLE */
|
||||
PublicationTable *publicationTable =
|
||||
makeNode(PublicationTable);
|
||||
publicationTable->relation = rangeVar;
|
||||
publicationTable->whereClause = whereClause;
|
||||
publicationTable->columns = columnNameList;
|
||||
|
||||
PublicationObjSpec *publicationObject = makeNode(PublicationObjSpec);
|
||||
publicationObject->pubobjtype = PUBLICATIONOBJ_TABLE;
|
||||
publicationObject->pubtable = publicationTable;
|
||||
publicationObject->name = NULL;
|
||||
publicationObject->location = -1;
|
||||
|
||||
return publicationObject;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* PreprocessAlterPublicationStmt handles ALTER PUBLICATION statements
|
||||
* in a way that is mostly similar to PreprocessAlterDistributedObjectStmt,
|
||||
* except we do not ensure sequential mode (publications do not interact with
|
||||
* shards) and can handle NULL deparse commands for ALTER PUBLICATION commands
|
||||
* that only involve local tables.
|
||||
*/
|
||||
List *
|
||||
PreprocessAlterPublicationStmt(Node *stmt, const char *queryString,
|
||||
ProcessUtilityContext processUtilityContext)
|
||||
{
|
||||
List *addresses = GetObjectAddressListFromParseTree(stmt, false, false);
|
||||
|
||||
/* the code-path only supports a single object */
|
||||
Assert(list_length(addresses) == 1);
|
||||
|
||||
if (!ShouldPropagateAnyObject(addresses))
|
||||
{
|
||||
return NIL;
|
||||
}
|
||||
|
||||
EnsureCoordinator();
|
||||
QualifyTreeNode(stmt);
|
||||
|
||||
const char *sql = DeparseTreeNode((Node *) stmt);
|
||||
if (sql == NULL)
|
||||
{
|
||||
/*
|
||||
* Deparsing logic decided that there is nothing to propagate, e.g.
|
||||
* because the command only concerns local tables.
|
||||
*/
|
||||
return NIL;
|
||||
}
|
||||
|
||||
List *commands = list_make3(DISABLE_DDL_PROPAGATION,
|
||||
(void *) sql,
|
||||
ENABLE_DDL_PROPAGATION);
|
||||
|
||||
return NodeDDLTaskList(NON_COORDINATOR_NODES, commands);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetAlterPublicationDDLCommandsForTable gets a list of ALTER PUBLICATION .. ADD/DROP
|
||||
* commands for the given table.
|
||||
*
|
||||
* If isAdd is true, it return ALTER PUBLICATION .. ADD TABLE commands for all
|
||||
* publications.
|
||||
*
|
||||
* Otherwise, it returns ALTER PUBLICATION .. DROP TABLE commands for all
|
||||
* publications.
|
||||
*/
|
||||
List *
|
||||
GetAlterPublicationDDLCommandsForTable(Oid relationId, bool isAdd)
|
||||
{
|
||||
List *commands = NIL;
|
||||
|
||||
List *publicationIds = GetRelationPublications(relationId);
|
||||
Oid publicationId = InvalidOid;
|
||||
|
||||
foreach_oid(publicationId, publicationIds)
|
||||
{
|
||||
char *command = GetAlterPublicationTableDDLCommand(publicationId,
|
||||
relationId, isAdd);
|
||||
|
||||
commands = lappend(commands, command);
|
||||
}
|
||||
|
||||
return commands;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetAlterPublicationTableDDLCommand generates an ALTer PUBLICATION .. ADD/DROP TABLE
|
||||
* command for the given publication and relation ID.
|
||||
*
|
||||
* If isAdd is true, it return an ALTER PUBLICATION .. ADD TABLE command.
|
||||
* Otherwise, it returns ALTER PUBLICATION .. DROP TABLE command.
|
||||
*/
|
||||
char *
|
||||
GetAlterPublicationTableDDLCommand(Oid publicationId, Oid relationId,
|
||||
bool isAdd)
|
||||
{
|
||||
HeapTuple pubTuple = SearchSysCache1(PUBLICATIONOID,
|
||||
ObjectIdGetDatum(publicationId));
|
||||
if (!HeapTupleIsValid(pubTuple))
|
||||
{
|
||||
ereport(ERROR, (errmsg("cannot find publication with oid: %d",
|
||||
publicationId)));
|
||||
}
|
||||
|
||||
Form_pg_publication pubForm = (Form_pg_publication) GETSTRUCT(pubTuple);
|
||||
|
||||
AlterPublicationStmt *alterPubStmt = makeNode(AlterPublicationStmt);
|
||||
alterPubStmt->pubname = pstrdup(NameStr(pubForm->pubname));
|
||||
|
||||
ReleaseSysCache(pubTuple);
|
||||
|
||||
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||
bool tableOnly = !isAdd;
|
||||
|
||||
/* since postgres 15, tables can have a column list and filter */
|
||||
PublicationObjSpec *publicationObject =
|
||||
BuildPublicationRelationObjSpec(relationId, publicationId, tableOnly);
|
||||
|
||||
alterPubStmt->pubobjects = lappend(alterPubStmt->pubobjects, publicationObject);
|
||||
alterPubStmt->action = isAdd ? AP_AddObjects : AP_DropObjects;
|
||||
#else
|
||||
|
||||
/* before postgres 15, only full tables are supported */
|
||||
char *schemaName = get_namespace_name(get_rel_namespace(relationId));
|
||||
char *tableName = get_rel_name(relationId);
|
||||
RangeVar *rangeVar = makeRangeVar(schemaName, tableName, -1);
|
||||
|
||||
alterPubStmt->tables = lappend(alterPubStmt->tables, rangeVar);
|
||||
alterPubStmt->tableAction = isAdd ? DEFELEM_ADD : DEFELEM_DROP;
|
||||
#endif
|
||||
|
||||
/* we take the WHERE clause from the catalog where it is already transformed */
|
||||
bool whereClauseNeedsTransform = false;
|
||||
|
||||
/*
|
||||
* We use these commands to restore publications before/after transforming a
|
||||
* table, including transformations to/from local tables.
|
||||
*/
|
||||
bool includeLocalTables = true;
|
||||
|
||||
char *command = DeparseAlterPublicationStmtExtended((Node *) alterPubStmt,
|
||||
whereClauseNeedsTransform,
|
||||
includeLocalTables);
|
||||
|
||||
return command;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AlterPublicationOwnerCommand returns "ALTER PUBLICATION .. OWNER TO .."
|
||||
* statement for the specified publication.
|
||||
*/
|
||||
static char *
|
||||
AlterPublicationOwnerCommand(Oid publicationId)
|
||||
{
|
||||
HeapTuple publicationTuple =
|
||||
SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(publicationId));
|
||||
|
||||
if (!HeapTupleIsValid(publicationTuple))
|
||||
{
|
||||
ereport(ERROR, (errmsg("cannot find publication with oid: %d",
|
||||
publicationId)));
|
||||
}
|
||||
|
||||
Form_pg_publication publicationForm =
|
||||
(Form_pg_publication) GETSTRUCT(publicationTuple);
|
||||
|
||||
char *publicationName = NameStr(publicationForm->pubname);
|
||||
Oid publicationOwnerId = publicationForm->pubowner;
|
||||
|
||||
char *publicationOwnerName = GetUserNameFromId(publicationOwnerId, false);
|
||||
|
||||
StringInfo alterCommand = makeStringInfo();
|
||||
appendStringInfo(alterCommand, "ALTER PUBLICATION %s OWNER TO %s",
|
||||
quote_identifier(publicationName),
|
||||
quote_identifier(publicationOwnerName));
|
||||
|
||||
ReleaseSysCache(publicationTuple);
|
||||
|
||||
return alterCommand->data;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ShouldPropagateCreatePublication tests if we need to propagate a CREATE PUBLICATION
|
||||
* statement.
|
||||
*/
|
||||
static bool
|
||||
ShouldPropagateCreatePublication(CreatePublicationStmt *stmt)
|
||||
{
|
||||
if (!ShouldPropagate())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ShouldPropagateCreateInCoordinatedTransction())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AlterPublicationStmtObjectAddress generates the object address for the
|
||||
* publication altered by a regular ALTER PUBLICATION .. statement.
|
||||
*/
|
||||
List *
|
||||
AlterPublicationStmtObjectAddress(Node *node, bool missingOk, bool isPostProcess)
|
||||
{
|
||||
AlterPublicationStmt *stmt = castNode(AlterPublicationStmt, node);
|
||||
|
||||
return ObjectAddressForPublicationName(stmt->pubname, missingOk);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AlterPublicationOwnerStmtObjectAddress generates the object address for the
|
||||
* publication altered by the given ALTER PUBLICATION .. OWNER TO statement.
|
||||
*/
|
||||
List *
|
||||
AlterPublicationOwnerStmtObjectAddress(Node *node, bool missingOk, bool isPostProcess)
|
||||
{
|
||||
AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node);
|
||||
|
||||
return ObjectAddressForPublicationName(strVal(stmt->object), missingOk);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CreatePublicationStmtObjectAddress generates the object address for the
|
||||
* publication created by the given CREATE PUBLICATION statement.
|
||||
*/
|
||||
List *
|
||||
CreatePublicationStmtObjectAddress(Node *node, bool missingOk, bool isPostProcess)
|
||||
{
|
||||
CreatePublicationStmt *stmt = castNode(CreatePublicationStmt, node);
|
||||
|
||||
return ObjectAddressForPublicationName(stmt->pubname, missingOk);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RenamePublicationStmtObjectAddress generates the object address for the
|
||||
* publication altered by the given ALter PUBLICATION .. RENAME TO statement.
|
||||
*/
|
||||
List *
|
||||
RenamePublicationStmtObjectAddress(Node *node, bool missingOk, bool isPostprocess)
|
||||
{
|
||||
RenameStmt *stmt = castNode(RenameStmt, node);
|
||||
|
||||
return ObjectAddressForPublicationName(strVal(stmt->object), missingOk);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ObjectAddressForPublicationName returns the object address for a given publication
|
||||
* name.
|
||||
*/
|
||||
static List *
|
||||
ObjectAddressForPublicationName(char *publicationName, bool missingOk)
|
||||
{
|
||||
Oid publicationId = InvalidOid;
|
||||
|
||||
HeapTuple publicationTuple =
|
||||
SearchSysCache1(PUBLICATIONNAME, CStringGetDatum(publicationName));
|
||||
if (HeapTupleIsValid(publicationTuple))
|
||||
{
|
||||
Form_pg_publication publicationForm =
|
||||
(Form_pg_publication) GETSTRUCT(publicationTuple);
|
||||
publicationId = publicationForm->oid;
|
||||
|
||||
ReleaseSysCache(publicationTuple);
|
||||
}
|
||||
else if (!missingOk)
|
||||
{
|
||||
/* it should have just been created */
|
||||
ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT),
|
||||
errmsg("publication \"%s\" does not exist", publicationName)));
|
||||
}
|
||||
|
||||
ObjectAddress *address = palloc0(sizeof(ObjectAddress));
|
||||
ObjectAddressSet(*address, PublicationRelationId, publicationId);
|
||||
|
||||
return list_make1(address);
|
||||
}
|
|
@ -33,7 +33,8 @@
|
|||
|
||||
/* Local functions forward declarations for helper functions */
|
||||
static bool OptionsSpecifyOwnedBy(List *optionList, Oid *ownedByTableId);
|
||||
static Oid SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress);
|
||||
static Oid SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress, char
|
||||
depType);
|
||||
static List * FilterDistributedSequences(GrantStmt *stmt);
|
||||
|
||||
|
||||
|
@ -183,7 +184,7 @@ ExtractDefaultColumnsAndOwnedSequences(Oid relationId, List **columnNameList,
|
|||
|
||||
char *columnName = NameStr(attributeForm->attname);
|
||||
List *columnOwnedSequences =
|
||||
getOwnedSequences_internal(relationId, attributeIndex + 1, 0);
|
||||
getOwnedSequences_internal(relationId, attributeIndex + 1, DEPENDENCY_AUTO);
|
||||
|
||||
if (attributeForm->atthasdef && list_length(columnOwnedSequences) == 0)
|
||||
{
|
||||
|
@ -453,21 +454,22 @@ PreprocessAlterSequenceStmt(Node *node, const char *queryString,
|
|||
/* the code-path only supports a single object */
|
||||
Assert(list_length(addresses) == 1);
|
||||
|
||||
/* We have already asserted that we have exactly 1 address in the addresses. */
|
||||
ObjectAddress *address = linitial(addresses);
|
||||
|
||||
/* error out if the sequence is distributed */
|
||||
if (IsAnyObjectDistributed(addresses))
|
||||
if (IsAnyObjectDistributed(addresses) || SequenceUsedInDistributedTable(address,
|
||||
DEPENDENCY_INTERNAL))
|
||||
{
|
||||
ereport(ERROR, (errmsg(
|
||||
"Altering a distributed sequence is currently not supported.")));
|
||||
}
|
||||
|
||||
/* We have already asserted that we have exactly 1 address in the addresses. */
|
||||
ObjectAddress *address = linitial(addresses);
|
||||
|
||||
/*
|
||||
* error out if the sequence is used in a distributed table
|
||||
* and this is an ALTER SEQUENCE .. AS .. statement
|
||||
*/
|
||||
Oid citusTableId = SequenceUsedInDistributedTable(address);
|
||||
Oid citusTableId = SequenceUsedInDistributedTable(address, DEPENDENCY_AUTO);
|
||||
if (citusTableId != InvalidOid)
|
||||
{
|
||||
List *options = stmt->options;
|
||||
|
@ -497,16 +499,19 @@ PreprocessAlterSequenceStmt(Node *node, const char *queryString,
|
|||
* SequenceUsedInDistributedTable returns true if the argument sequence
|
||||
* is used as the default value of a column in a distributed table.
|
||||
* Returns false otherwise
|
||||
* See DependencyType for the possible values of depType.
|
||||
* We use DEPENDENCY_INTERNAL for sequences created by identity column.
|
||||
* DEPENDENCY_AUTO for regular sequences.
|
||||
*/
|
||||
static Oid
|
||||
SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress)
|
||||
SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress, char depType)
|
||||
{
|
||||
List *citusTableIdList = CitusTableTypeIdList(ANY_CITUS_TABLE_TYPE);
|
||||
Oid citusTableId = InvalidOid;
|
||||
foreach_oid(citusTableId, citusTableIdList)
|
||||
{
|
||||
List *seqInfoList = NIL;
|
||||
GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0);
|
||||
GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0, depType);
|
||||
SequenceInfo *seqInfo = NULL;
|
||||
foreach_ptr(seqInfo, seqInfoList)
|
||||
{
|
||||
|
|
|
@ -75,7 +75,7 @@ static void DistributePartitionUsingParent(Oid parentRelationId,
|
|||
static void ErrorIfMultiLevelPartitioning(Oid parentRelationId, Oid partitionRelationId);
|
||||
static void ErrorIfAttachCitusTableToPgLocalTable(Oid parentRelationId,
|
||||
Oid partitionRelationId);
|
||||
static bool AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(
|
||||
static bool ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(
|
||||
AlterTableStmt *alterTableStatement);
|
||||
static bool ShouldMarkConnectedRelationsNotAutoConverted(Oid leftRelationId,
|
||||
Oid rightRelationId);
|
||||
|
@ -1119,7 +1119,7 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
|
|||
|
||||
if (ShouldEnableLocalReferenceForeignKeys() &&
|
||||
processUtilityContext != PROCESS_UTILITY_SUBCOMMAND &&
|
||||
AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(alterTableStatement))
|
||||
ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(alterTableStatement))
|
||||
{
|
||||
/*
|
||||
* We don't process subcommands generated by postgres.
|
||||
|
@ -1378,29 +1378,6 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We check for ADD COLUMN .. GENERATED .. AS IDENTITY expr
|
||||
* since it uses a sequence as an internal dependency
|
||||
* we should deparse the statement
|
||||
*/
|
||||
constraint = NULL;
|
||||
foreach_ptr(constraint, columnConstraints)
|
||||
{
|
||||
if (constraint->contype == CONSTR_IDENTITY)
|
||||
{
|
||||
deparseAT = true;
|
||||
useInitialDDLCommandString = false;
|
||||
|
||||
/*
|
||||
* Since we don't support constraints for AT_AddColumn
|
||||
* we have to set is_not_null to true explicitly for identity columns
|
||||
*/
|
||||
ColumnDef *newColDef = copyObject(columnDefinition);
|
||||
newColDef->constraints = NULL;
|
||||
newColDef->is_not_null = true;
|
||||
newCmd->def = (Node *) newColDef;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We check for ADD COLUMN .. SERIAL pseudo-type
|
||||
|
@ -1584,12 +1561,12 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
|
|||
|
||||
|
||||
/*
|
||||
* AlterTableDefinesFKeyBetweenPostgresAndNonDistTable returns true if given
|
||||
* ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef returns true if given
|
||||
* alter table command defines foreign key between a postgres table and a
|
||||
* reference or citus local table.
|
||||
*/
|
||||
static bool
|
||||
AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(AlterTableStmt *alterTableStatement)
|
||||
ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(AlterTableStmt *alterTableStatement)
|
||||
{
|
||||
List *foreignKeyConstraintList =
|
||||
GetAlterTableAddFKeyConstraintList(alterTableStatement);
|
||||
|
@ -1607,9 +1584,12 @@ AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(AlterTableStmt *alterTableSt
|
|||
if (!IsCitusTable(leftRelationId))
|
||||
{
|
||||
return RelationIdListContainsCitusTableType(rightRelationIdList,
|
||||
CITUS_TABLE_WITH_NO_DIST_KEY);
|
||||
CITUS_LOCAL_TABLE) ||
|
||||
RelationIdListContainsCitusTableType(rightRelationIdList,
|
||||
REFERENCE_TABLE);
|
||||
}
|
||||
else if (IsCitusTableType(leftRelationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
else if (IsCitusTableType(leftRelationId, CITUS_LOCAL_TABLE) ||
|
||||
IsCitusTableType(leftRelationId, REFERENCE_TABLE))
|
||||
{
|
||||
return RelationIdListContainsPostgresTable(rightRelationIdList);
|
||||
}
|
||||
|
@ -2539,34 +2519,6 @@ PostprocessAlterTableStmt(AlterTableStmt *alterTableStatement)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We check for ADD COLUMN .. GENERATED AS IDENTITY expr
|
||||
* since it uses a seqeunce as an internal dependency
|
||||
*/
|
||||
constraint = NULL;
|
||||
foreach_ptr(constraint, columnConstraints)
|
||||
{
|
||||
if (constraint->contype == CONSTR_IDENTITY)
|
||||
{
|
||||
AttrNumber attnum = get_attnum(relationId,
|
||||
columnDefinition->colname);
|
||||
bool missing_ok = false;
|
||||
Oid seqOid = getIdentitySequence(relationId, attnum, missing_ok);
|
||||
|
||||
if (ShouldSyncTableMetadata(relationId))
|
||||
{
|
||||
needMetadataSyncForNewSequences = true;
|
||||
alterTableDefaultNextvalCmd =
|
||||
GetAddColumnWithNextvalDefaultCmd(seqOid,
|
||||
relationId,
|
||||
columnDefinition
|
||||
->colname,
|
||||
columnDefinition
|
||||
->typeName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* We check for ALTER COLUMN .. SET DEFAULT nextval('user_defined_seq')
|
||||
|
@ -3222,6 +3174,17 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement)
|
|||
{
|
||||
if (columnConstraint->contype == CONSTR_IDENTITY)
|
||||
{
|
||||
/*
|
||||
* We currently don't support adding an identity column for an MX table
|
||||
*/
|
||||
if (ShouldSyncTableMetadata(relationId))
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg(
|
||||
"cannot execute ADD COLUMN commands involving identity"
|
||||
" columns when metadata is synchronized to workers")));
|
||||
}
|
||||
|
||||
/*
|
||||
* Currently we don't support backfilling the new identity column with default values
|
||||
* if the table is not empty
|
||||
|
@ -3352,7 +3315,8 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement)
|
|||
*/
|
||||
AttrNumber attnum = get_attnum(relationId, command->name);
|
||||
List *seqInfoList = NIL;
|
||||
GetDependentSequencesWithRelation(relationId, &seqInfoList, attnum);
|
||||
GetDependentSequencesWithRelation(relationId, &seqInfoList, attnum,
|
||||
DEPENDENCY_AUTO);
|
||||
if (seqInfoList != NIL)
|
||||
{
|
||||
ereport(ERROR, (errmsg("cannot execute ALTER COLUMN TYPE .. command "
|
||||
|
@ -3666,7 +3630,7 @@ SetupExecutionModeForAlterTable(Oid relationId, AlterTableCmd *command)
|
|||
* sequential mode.
|
||||
*/
|
||||
if (executeSequentially &&
|
||||
!IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY) &&
|
||||
HasDistributionKey(relationId) &&
|
||||
ParallelQueryExecutedInTransaction())
|
||||
{
|
||||
char *relationName = get_rel_name(relationId);
|
||||
|
@ -4011,3 +3975,59 @@ MakeNameListFromRangeVar(const RangeVar *rel)
|
|||
return list_make1(makeString(rel->relname));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ErrorIfTableHasUnsupportedIdentityColumn errors out if the given table has any identity column other than bigint identity column.
|
||||
*/
|
||||
void
|
||||
ErrorIfTableHasUnsupportedIdentityColumn(Oid relationId)
|
||||
{
|
||||
Relation relation = relation_open(relationId, AccessShareLock);
|
||||
TupleDesc tupleDescriptor = RelationGetDescr(relation);
|
||||
|
||||
for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts;
|
||||
attributeIndex++)
|
||||
{
|
||||
Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex);
|
||||
|
||||
if (attributeForm->attidentity && attributeForm->atttypid != INT8OID)
|
||||
{
|
||||
char *qualifiedRelationName = generate_qualified_relation_name(relationId);
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg(
|
||||
"cannot complete operation on %s with smallint/int identity column",
|
||||
qualifiedRelationName),
|
||||
errhint(
|
||||
"Use bigint identity column instead.")));
|
||||
}
|
||||
}
|
||||
|
||||
relation_close(relation, NoLock);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ErrorIfTableHasIdentityColumn errors out if the given table has identity column
|
||||
*/
|
||||
void
|
||||
ErrorIfTableHasIdentityColumn(Oid relationId)
|
||||
{
|
||||
Relation relation = relation_open(relationId, AccessShareLock);
|
||||
TupleDesc tupleDescriptor = RelationGetDescr(relation);
|
||||
|
||||
for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts;
|
||||
attributeIndex++)
|
||||
{
|
||||
Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex);
|
||||
|
||||
if (attributeForm->attidentity)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg(
|
||||
"cannot complete operation on a table with identity column")));
|
||||
}
|
||||
}
|
||||
|
||||
relation_close(relation, NoLock);
|
||||
}
|
||||
|
|
|
@ -324,7 +324,7 @@ ExecuteTruncateStmtSequentialIfNecessary(TruncateStmt *command)
|
|||
{
|
||||
Oid relationId = RangeVarGetRelid(rangeVar, NoLock, failOK);
|
||||
|
||||
if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY) &&
|
||||
if (IsCitusTable(relationId) && !HasDistributionKey(relationId) &&
|
||||
TableReferenced(relationId))
|
||||
{
|
||||
char *relationName = get_rel_name(relationId);
|
||||
|
|
|
@ -53,6 +53,7 @@
|
|||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/deparser.h"
|
||||
#include "distributed/deparse_shard_query.h"
|
||||
#include "distributed/executor_util.h"
|
||||
#include "distributed/foreign_key_relationship.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/local_executor.h"
|
||||
|
|
|
@ -1202,6 +1202,17 @@ FinishConnectionEstablishment(MultiConnection *connection)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ForceConnectionCloseAtTransactionEnd marks connection to be closed at the end of the
|
||||
* transaction.
|
||||
*/
|
||||
void
|
||||
ForceConnectionCloseAtTransactionEnd(MultiConnection *connection)
|
||||
{
|
||||
connection->forceCloseAtTransactionEnd = true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ClaimConnectionExclusively signals that this connection is actively being
|
||||
* used. That means it'll not be, again, returned by
|
||||
|
@ -1484,6 +1495,7 @@ AfterXactHostConnectionHandling(ConnectionHashEntry *entry, bool isCommit)
|
|||
* - Current cached connections is already at MaxCachedConnectionsPerWorker
|
||||
* - Connection is forced to close at the end of transaction
|
||||
* - Connection is not in OK state
|
||||
* - Connection has a replication origin setup
|
||||
* - A transaction is still in progress (usually because we are cancelling a distributed transaction)
|
||||
* - A connection reached its maximum lifetime
|
||||
*/
|
||||
|
@ -1503,6 +1515,7 @@ ShouldShutdownConnection(MultiConnection *connection, const int cachedConnection
|
|||
PQstatus(connection->pgConn) != CONNECTION_OK ||
|
||||
!RemoteTransactionIdle(connection) ||
|
||||
connection->requiresReplication ||
|
||||
connection->isReplicationOriginSessionSetup ||
|
||||
(MaxCachedConnectionLifetime >= 0 &&
|
||||
MillisecondsToTimeout(connection->connectionEstablishmentStart,
|
||||
MaxCachedConnectionLifetime) <= 0);
|
||||
|
|
|
@ -573,6 +573,47 @@ SendRemoteCommand(MultiConnection *connection, const char *command)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ExecuteRemoteCommandAndCheckResult executes the given command in the remote node and
|
||||
* checks if the result is equal to the expected result. If the result is equal to the
|
||||
* expected result, the function returns true, otherwise it returns false.
|
||||
*/
|
||||
bool
|
||||
ExecuteRemoteCommandAndCheckResult(MultiConnection *connection, char *command,
|
||||
char *expected)
|
||||
{
|
||||
if (!SendRemoteCommand(connection, command))
|
||||
{
|
||||
/* if we cannot connect, we warn and report false */
|
||||
ReportConnectionError(connection, WARNING);
|
||||
return false;
|
||||
}
|
||||
bool raiseInterrupts = true;
|
||||
PGresult *queryResult = GetRemoteCommandResult(connection, raiseInterrupts);
|
||||
|
||||
/* if remote node throws an error, we also throw an error */
|
||||
if (!IsResponseOK(queryResult))
|
||||
{
|
||||
ReportResultError(connection, queryResult, ERROR);
|
||||
}
|
||||
|
||||
StringInfo queryResultString = makeStringInfo();
|
||||
|
||||
/* Evaluate the queryResult and store it into the queryResultString */
|
||||
bool success = EvaluateSingleQueryResult(connection, queryResult, queryResultString);
|
||||
bool result = false;
|
||||
if (success && strcmp(queryResultString->data, expected) == 0)
|
||||
{
|
||||
result = true;
|
||||
}
|
||||
|
||||
PQclear(queryResult);
|
||||
ForgetResults(connection);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ReadFirstColumnAsText reads the first column of result tuples from the given
|
||||
* PGresult struct and returns them in a StringInfo list.
|
||||
|
|
|
@ -304,10 +304,7 @@ pg_get_sequencedef(Oid sequenceRelationId)
|
|||
* When it's WORKER_NEXTVAL_SEQUENCE_DEFAULTS, the function creates the DEFAULT
|
||||
* clause using worker_nextval('sequence') and not nextval('sequence')
|
||||
* When IncludeIdentities is NO_IDENTITY, the function does not include identity column
|
||||
* specifications. When it's INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS, the function
|
||||
* uses sequences and set them as default values for identity columns by using exactly
|
||||
* the same approach with worker_nextval('sequence') & nextval('sequence') logic
|
||||
* desribed above. When it's INCLUDE_IDENTITY it creates GENERATED .. AS IDENTIY clauses.
|
||||
* specifications. When it's INCLUDE_IDENTITY it creates GENERATED .. AS IDENTIY clauses.
|
||||
*/
|
||||
char *
|
||||
pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults
|
||||
|
@ -403,26 +400,9 @@ pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults
|
|||
Oid seqOid = getIdentitySequence(RelationGetRelid(relation),
|
||||
attributeForm->attnum, missing_ok);
|
||||
|
||||
char *sequenceName = generate_qualified_relation_name(seqOid);
|
||||
|
||||
if (includeIdentityDefaults == INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS)
|
||||
{
|
||||
if (pg_get_sequencedef(seqOid)->seqtypid != INT8OID)
|
||||
{
|
||||
appendStringInfo(&buffer,
|
||||
" DEFAULT worker_nextval(%s::regclass)",
|
||||
quote_literal_cstr(sequenceName));
|
||||
}
|
||||
else
|
||||
{
|
||||
appendStringInfo(&buffer, " DEFAULT nextval(%s::regclass)",
|
||||
quote_literal_cstr(sequenceName));
|
||||
}
|
||||
}
|
||||
else if (includeIdentityDefaults == INCLUDE_IDENTITY)
|
||||
if (includeIdentityDefaults == INCLUDE_IDENTITY)
|
||||
{
|
||||
Form_pg_sequence pgSequenceForm = pg_get_sequencedef(seqOid);
|
||||
uint64 sequenceStart = nextval_internal(seqOid, false);
|
||||
char *sequenceDef = psprintf(
|
||||
" GENERATED %s AS IDENTITY (INCREMENT BY " INT64_FORMAT \
|
||||
" MINVALUE " INT64_FORMAT " MAXVALUE "
|
||||
|
@ -433,7 +413,8 @@ pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults
|
|||
"ALWAYS" : "BY DEFAULT",
|
||||
pgSequenceForm->seqincrement,
|
||||
pgSequenceForm->seqmin,
|
||||
pgSequenceForm->seqmax, sequenceStart,
|
||||
pgSequenceForm->seqmax,
|
||||
pgSequenceForm->seqstart,
|
||||
pgSequenceForm->seqcache,
|
||||
pgSequenceForm->seqcycle ? "" : "NO ");
|
||||
|
||||
|
@ -1391,7 +1372,7 @@ convert_aclright_to_string(int aclright)
|
|||
|
||||
/*
|
||||
* contain_nextval_expression_walker walks over expression tree and returns
|
||||
* true if it contains call to 'nextval' function.
|
||||
* true if it contains call to 'nextval' function or it has an identity column.
|
||||
*/
|
||||
bool
|
||||
contain_nextval_expression_walker(Node *node, void *context)
|
||||
|
@ -1401,6 +1382,13 @@ contain_nextval_expression_walker(Node *node, void *context)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* check if the node contains an identity column */
|
||||
if (IsA(node, NextValueExpr))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/* check if the node contains call to 'nextval' */
|
||||
if (IsA(node, FuncExpr))
|
||||
{
|
||||
FuncExpr *funcExpr = (FuncExpr *) node;
|
||||
|
|
|
@ -0,0 +1,690 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* deparse_publication_stmts.c
|
||||
* All routines to deparse publication statements.
|
||||
*
|
||||
* Copyright (c) Citus Data, Inc.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/relation.h"
|
||||
#include "catalog/namespace.h"
|
||||
#include "commands/defrem.h"
|
||||
#include "distributed/citus_ruleutils.h"
|
||||
#include "distributed/deparser.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/namespace_utils.h"
|
||||
#include "lib/stringinfo.h"
|
||||
#include "parser/parse_clause.h"
|
||||
#include "parser/parse_collate.h"
|
||||
#include "parser/parse_node.h"
|
||||
#include "parser/parse_relation.h"
|
||||
#include "nodes/value.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/ruleutils.h"
|
||||
|
||||
|
||||
static void AppendCreatePublicationStmt(StringInfo buf, CreatePublicationStmt *stmt,
|
||||
bool whereClauseNeedsTransform,
|
||||
bool includeLocalTables);
|
||||
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||
static bool AppendPublicationObjects(StringInfo buf, List *publicationObjects,
|
||||
bool whereClauseNeedsTransform,
|
||||
bool includeLocalTables);
|
||||
static void AppendWhereClauseExpression(StringInfo buf, RangeVar *tableName,
|
||||
Node *whereClause,
|
||||
bool whereClauseNeedsTransform);
|
||||
static void AppendAlterPublicationAction(StringInfo buf, AlterPublicationAction action);
|
||||
#else
|
||||
static bool AppendTables(StringInfo buf, List *tables, bool includeLocalTables);
|
||||
static void AppendDefElemAction(StringInfo buf, DefElemAction action);
|
||||
#endif
|
||||
static bool AppendAlterPublicationStmt(StringInfo buf, AlterPublicationStmt *stmt,
|
||||
bool whereClauseNeedsTransform,
|
||||
bool includeLocalTables);
|
||||
static void AppendDropPublicationStmt(StringInfo buf, DropStmt *stmt);
|
||||
static void AppendRenamePublicationStmt(StringInfo buf, RenameStmt *stmt);
|
||||
static void AppendAlterPublicationOwnerStmt(StringInfo buf, AlterOwnerStmt *stmt);
|
||||
static void AppendPublicationOptions(StringInfo stringBuffer, List *optionList);
|
||||
static void AppendIdentifierList(StringInfo buf, List *objects);
|
||||
|
||||
|
||||
/*
|
||||
* DeparseCreatePublicationStmt builds and returns a string representing a
|
||||
* CreatePublicationStmt.
|
||||
*/
|
||||
char *
|
||||
DeparseCreatePublicationStmt(Node *node)
|
||||
{
|
||||
/* regular deparsing function takes CREATE PUBLICATION from the parser */
|
||||
bool whereClauseNeedsTransform = false;
|
||||
|
||||
/* for regular CREATE PUBLICATION we do not propagate local tables */
|
||||
bool includeLocalTables = false;
|
||||
|
||||
return DeparseCreatePublicationStmtExtended(node, whereClauseNeedsTransform,
|
||||
includeLocalTables);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DeparseCreatePublicationStmtExtended builds and returns a string representing a
|
||||
* CreatePublicationStmt, which may have already-transformed expressions.
|
||||
*/
|
||||
char *
|
||||
DeparseCreatePublicationStmtExtended(Node *node, bool whereClauseNeedsTransform,
|
||||
bool includeLocalTables)
|
||||
{
|
||||
CreatePublicationStmt *stmt = castNode(CreatePublicationStmt, node);
|
||||
|
||||
StringInfoData str = { 0 };
|
||||
initStringInfo(&str);
|
||||
|
||||
AppendCreatePublicationStmt(&str, stmt, whereClauseNeedsTransform,
|
||||
includeLocalTables);
|
||||
|
||||
return str.data;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendCreatePublicationStmt appends a string representing a
|
||||
* CreatePublicationStmt to a buffer.
|
||||
*/
|
||||
static void
|
||||
AppendCreatePublicationStmt(StringInfo buf, CreatePublicationStmt *stmt,
|
||||
bool whereClauseNeedsTransform,
|
||||
bool includeLocalTables)
|
||||
{
|
||||
appendStringInfo(buf, "CREATE PUBLICATION %s",
|
||||
quote_identifier(stmt->pubname));
|
||||
|
||||
if (stmt->for_all_tables)
|
||||
{
|
||||
appendStringInfoString(buf, " FOR ALL TABLES");
|
||||
}
|
||||
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||
else if (stmt->pubobjects != NIL)
|
||||
{
|
||||
bool hasObjects = false;
|
||||
PublicationObjSpec *publicationObject = NULL;
|
||||
|
||||
/*
|
||||
* Check whether there are objects to propagate, mainly to know whether
|
||||
* we should include "FOR".
|
||||
*/
|
||||
foreach_ptr(publicationObject, stmt->pubobjects)
|
||||
{
|
||||
if (publicationObject->pubobjtype == PUBLICATIONOBJ_TABLE)
|
||||
{
|
||||
/* FOR TABLE ... */
|
||||
PublicationTable *publicationTable = publicationObject->pubtable;
|
||||
|
||||
if (includeLocalTables ||
|
||||
IsCitusTableRangeVar(publicationTable->relation, NoLock, false))
|
||||
{
|
||||
hasObjects = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
hasObjects = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (hasObjects)
|
||||
{
|
||||
appendStringInfoString(buf, " FOR");
|
||||
AppendPublicationObjects(buf, stmt->pubobjects, whereClauseNeedsTransform,
|
||||
includeLocalTables);
|
||||
}
|
||||
}
|
||||
#else
|
||||
else if (stmt->tables != NIL)
|
||||
{
|
||||
bool hasTables = false;
|
||||
RangeVar *rangeVar = NULL;
|
||||
|
||||
/*
|
||||
* Check whether there are tables to propagate, mainly to know whether
|
||||
* we should include "FOR".
|
||||
*/
|
||||
foreach_ptr(rangeVar, stmt->tables)
|
||||
{
|
||||
if (includeLocalTables || IsCitusTableRangeVar(rangeVar, NoLock, false))
|
||||
{
|
||||
hasTables = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (hasTables)
|
||||
{
|
||||
appendStringInfoString(buf, " FOR");
|
||||
AppendTables(buf, stmt->tables, includeLocalTables);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (stmt->options != NIL)
|
||||
{
|
||||
appendStringInfoString(buf, " WITH (");
|
||||
AppendPublicationOptions(buf, stmt->options);
|
||||
appendStringInfoString(buf, ")");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||
|
||||
/*
|
||||
* AppendPublicationObjects appends a string representing a list of publication
|
||||
* objects to a buffer.
|
||||
*
|
||||
* For instance: TABLE users, departments, TABLES IN SCHEMA production
|
||||
*/
|
||||
static bool
|
||||
AppendPublicationObjects(StringInfo buf, List *publicationObjects,
|
||||
bool whereClauseNeedsTransform,
|
||||
bool includeLocalTables)
|
||||
{
|
||||
PublicationObjSpec *publicationObject = NULL;
|
||||
bool appendedObject = false;
|
||||
|
||||
foreach_ptr(publicationObject, publicationObjects)
|
||||
{
|
||||
if (publicationObject->pubobjtype == PUBLICATIONOBJ_TABLE)
|
||||
{
|
||||
/* FOR TABLE ... */
|
||||
PublicationTable *publicationTable = publicationObject->pubtable;
|
||||
RangeVar *rangeVar = publicationTable->relation;
|
||||
char *schemaName = rangeVar->schemaname;
|
||||
char *tableName = rangeVar->relname;
|
||||
|
||||
if (!includeLocalTables && !IsCitusTableRangeVar(rangeVar, NoLock, false))
|
||||
{
|
||||
/* do not propagate local tables */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (schemaName != NULL)
|
||||
{
|
||||
/* qualified table name */
|
||||
appendStringInfo(buf, "%s TABLE %s",
|
||||
appendedObject ? "," : "",
|
||||
quote_qualified_identifier(schemaName, tableName));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* unqualified table name */
|
||||
appendStringInfo(buf, "%s TABLE %s",
|
||||
appendedObject ? "," : "",
|
||||
quote_identifier(tableName));
|
||||
}
|
||||
|
||||
if (publicationTable->columns != NIL)
|
||||
{
|
||||
appendStringInfoString(buf, " (");
|
||||
AppendIdentifierList(buf, publicationTable->columns);
|
||||
appendStringInfoString(buf, ")");
|
||||
}
|
||||
|
||||
if (publicationTable->whereClause != NULL)
|
||||
{
|
||||
appendStringInfoString(buf, " WHERE (");
|
||||
|
||||
AppendWhereClauseExpression(buf, rangeVar,
|
||||
publicationTable->whereClause,
|
||||
whereClauseNeedsTransform);
|
||||
|
||||
appendStringInfoString(buf, ")");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* FOR TABLES IN SCHEMA */
|
||||
char *schemaName = publicationObject->name;
|
||||
|
||||
if (publicationObject->pubobjtype == PUBLICATIONOBJ_TABLES_IN_CUR_SCHEMA)
|
||||
{
|
||||
List *searchPath = fetch_search_path(false);
|
||||
if (searchPath == NIL)
|
||||
{
|
||||
ereport(ERROR, errcode(ERRCODE_UNDEFINED_SCHEMA),
|
||||
errmsg("no schema has been selected for "
|
||||
"CURRENT_SCHEMA"));
|
||||
}
|
||||
|
||||
schemaName = get_namespace_name(linitial_oid(searchPath));
|
||||
}
|
||||
|
||||
appendStringInfo(buf, "%s TABLES IN SCHEMA %s",
|
||||
appendedObject ? "," : "",
|
||||
quote_identifier(schemaName));
|
||||
}
|
||||
|
||||
appendedObject = true;
|
||||
}
|
||||
|
||||
return appendedObject;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendWhereClauseExpression appends a deparsed expression that can
|
||||
* contain a filter on the given table. If whereClauseNeedsTransform is set
|
||||
* the expression is first tranformed.
|
||||
*/
|
||||
static void
|
||||
AppendWhereClauseExpression(StringInfo buf, RangeVar *tableName,
|
||||
Node *whereClause, bool whereClauseNeedsTransform)
|
||||
{
|
||||
Relation relation = relation_openrv(tableName, AccessShareLock);
|
||||
|
||||
if (whereClauseNeedsTransform)
|
||||
{
|
||||
ParseState *pstate = make_parsestate(NULL);
|
||||
pstate->p_sourcetext = "";
|
||||
ParseNamespaceItem *nsitem = addRangeTableEntryForRelation(pstate,
|
||||
relation,
|
||||
AccessShareLock, NULL,
|
||||
false, false);
|
||||
addNSItemToQuery(pstate, nsitem, false, true, true);
|
||||
|
||||
whereClause = transformWhereClause(pstate,
|
||||
copyObject(whereClause),
|
||||
EXPR_KIND_WHERE,
|
||||
"PUBLICATION WHERE");
|
||||
|
||||
assign_expr_collations(pstate, whereClause);
|
||||
}
|
||||
|
||||
List *relationContext = deparse_context_for(tableName->relname, relation->rd_id);
|
||||
|
||||
PushOverrideEmptySearchPath(CurrentMemoryContext);
|
||||
char *whereClauseString = deparse_expression(whereClause,
|
||||
relationContext,
|
||||
true, true);
|
||||
PopOverrideSearchPath();
|
||||
|
||||
appendStringInfoString(buf, whereClauseString);
|
||||
|
||||
relation_close(relation, AccessShareLock);
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* AppendPublicationObjects appends a string representing a list of publication
|
||||
* objects to a buffer.
|
||||
*
|
||||
* For instance: TABLE users, departments
|
||||
*/
|
||||
static bool
|
||||
AppendTables(StringInfo buf, List *tables, bool includeLocalTables)
|
||||
{
|
||||
RangeVar *rangeVar = NULL;
|
||||
bool appendedObject = false;
|
||||
|
||||
foreach_ptr(rangeVar, tables)
|
||||
{
|
||||
if (!includeLocalTables &&
|
||||
!IsCitusTableRangeVar(rangeVar, NoLock, false))
|
||||
{
|
||||
/* do not propagate local tables */
|
||||
continue;
|
||||
}
|
||||
|
||||
char *schemaName = rangeVar->schemaname;
|
||||
char *tableName = rangeVar->relname;
|
||||
|
||||
if (schemaName != NULL)
|
||||
{
|
||||
/* qualified table name */
|
||||
appendStringInfo(buf, "%s %s",
|
||||
appendedObject ? "," : " TABLE",
|
||||
quote_qualified_identifier(schemaName, tableName));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* unqualified table name */
|
||||
appendStringInfo(buf, "%s %s",
|
||||
appendedObject ? "," : " TABLE",
|
||||
quote_identifier(tableName));
|
||||
}
|
||||
|
||||
appendedObject = true;
|
||||
}
|
||||
|
||||
return appendedObject;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* DeparseAlterPublicationSchemaStmt builds and returns a string representing
|
||||
* an AlterPublicationStmt.
|
||||
*/
|
||||
char *
|
||||
DeparseAlterPublicationStmt(Node *node)
|
||||
{
|
||||
/* regular deparsing function takes ALTER PUBLICATION from the parser */
|
||||
bool whereClauseNeedsTransform = true;
|
||||
|
||||
/* for regular ALTER PUBLICATION we do not propagate local tables */
|
||||
bool includeLocalTables = false;
|
||||
|
||||
return DeparseAlterPublicationStmtExtended(node, whereClauseNeedsTransform,
|
||||
includeLocalTables);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DeparseAlterPublicationStmtExtended builds and returns a string representing a
|
||||
* AlterPublicationStmt, which may have already-transformed expressions.
|
||||
*/
|
||||
char *
|
||||
DeparseAlterPublicationStmtExtended(Node *node, bool whereClauseNeedsTransform,
|
||||
bool includeLocalTables)
|
||||
{
|
||||
AlterPublicationStmt *stmt = castNode(AlterPublicationStmt, node);
|
||||
StringInfoData str = { 0 };
|
||||
initStringInfo(&str);
|
||||
|
||||
if (!AppendAlterPublicationStmt(&str, stmt, whereClauseNeedsTransform,
|
||||
includeLocalTables))
|
||||
{
|
||||
Assert(!includeLocalTables);
|
||||
|
||||
/*
|
||||
* When there are no objects to propagate, then there is no
|
||||
* valid ALTER PUBLICATION to construct.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return str.data;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendAlterPublicationStmt appends a string representing an AlterPublicationStmt
|
||||
* of the form ALTER PUBLICATION .. ADD/SET/DROP
|
||||
*/
|
||||
static bool
|
||||
AppendAlterPublicationStmt(StringInfo buf, AlterPublicationStmt *stmt,
|
||||
bool whereClauseNeedsTransform,
|
||||
bool includeLocalTables)
|
||||
{
|
||||
appendStringInfo(buf, "ALTER PUBLICATION %s",
|
||||
quote_identifier(stmt->pubname));
|
||||
|
||||
if (stmt->options)
|
||||
{
|
||||
appendStringInfoString(buf, " SET (");
|
||||
AppendPublicationOptions(buf, stmt->options);
|
||||
appendStringInfoString(buf, ")");
|
||||
|
||||
/* changing options cannot be combined with other actions */
|
||||
return true;
|
||||
}
|
||||
|
||||
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||
AppendAlterPublicationAction(buf, stmt->action);
|
||||
return AppendPublicationObjects(buf, stmt->pubobjects, whereClauseNeedsTransform,
|
||||
includeLocalTables);
|
||||
#else
|
||||
AppendDefElemAction(buf, stmt->tableAction);
|
||||
return AppendTables(buf, stmt->tables, includeLocalTables);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||
|
||||
/*
|
||||
* AppendAlterPublicationAction appends a string representing an AlterPublicationAction
|
||||
* to a buffer.
|
||||
*/
|
||||
static void
|
||||
AppendAlterPublicationAction(StringInfo buf, AlterPublicationAction action)
|
||||
{
|
||||
switch (action)
|
||||
{
|
||||
case AP_AddObjects:
|
||||
{
|
||||
appendStringInfoString(buf, " ADD");
|
||||
break;
|
||||
}
|
||||
|
||||
case AP_DropObjects:
|
||||
{
|
||||
appendStringInfoString(buf, " DROP");
|
||||
break;
|
||||
}
|
||||
|
||||
case AP_SetObjects:
|
||||
{
|
||||
appendStringInfoString(buf, " SET");
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
ereport(ERROR, (errmsg("unrecognized publication action: %d", action)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* AppendDefElemAction appends a string representing a DefElemAction
|
||||
* to a buffer.
|
||||
*/
|
||||
static void
|
||||
AppendDefElemAction(StringInfo buf, DefElemAction action)
|
||||
{
|
||||
switch (action)
|
||||
{
|
||||
case DEFELEM_ADD:
|
||||
{
|
||||
appendStringInfoString(buf, " ADD");
|
||||
break;
|
||||
}
|
||||
|
||||
case DEFELEM_DROP:
|
||||
{
|
||||
appendStringInfoString(buf, " DROP");
|
||||
break;
|
||||
}
|
||||
|
||||
case DEFELEM_SET:
|
||||
{
|
||||
appendStringInfoString(buf, " SET");
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
ereport(ERROR, (errmsg("unrecognized publication action: %d", action)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* DeparseDropPublicationStmt builds and returns a string representing the DropStmt
|
||||
*/
|
||||
char *
|
||||
DeparseDropPublicationStmt(Node *node)
|
||||
{
|
||||
DropStmt *stmt = castNode(DropStmt, node);
|
||||
StringInfoData str = { 0 };
|
||||
initStringInfo(&str);
|
||||
|
||||
Assert(stmt->removeType == OBJECT_PUBLICATION);
|
||||
|
||||
AppendDropPublicationStmt(&str, stmt);
|
||||
|
||||
return str.data;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendDropPublicationStmt appends a string representing the DropStmt to a buffer
|
||||
*/
|
||||
static void
|
||||
AppendDropPublicationStmt(StringInfo buf, DropStmt *stmt)
|
||||
{
|
||||
appendStringInfoString(buf, "DROP PUBLICATION ");
|
||||
if (stmt->missing_ok)
|
||||
{
|
||||
appendStringInfoString(buf, "IF EXISTS ");
|
||||
}
|
||||
AppendIdentifierList(buf, stmt->objects);
|
||||
if (stmt->behavior == DROP_CASCADE)
|
||||
{
|
||||
appendStringInfoString(buf, " CASCADE");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DeparseRenamePublicationStmt builds and returns a string representing the RenameStmt
|
||||
*/
|
||||
char *
|
||||
DeparseRenamePublicationStmt(Node *node)
|
||||
{
|
||||
RenameStmt *stmt = castNode(RenameStmt, node);
|
||||
StringInfoData str = { 0 };
|
||||
initStringInfo(&str);
|
||||
|
||||
Assert(stmt->renameType == OBJECT_PUBLICATION);
|
||||
|
||||
AppendRenamePublicationStmt(&str, stmt);
|
||||
|
||||
return str.data;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendRenamePublicationStmt appends a string representing the RenameStmt to a buffer
|
||||
*/
|
||||
static void
|
||||
AppendRenamePublicationStmt(StringInfo buf, RenameStmt *stmt)
|
||||
{
|
||||
appendStringInfo(buf, "ALTER PUBLICATION %s RENAME TO %s;",
|
||||
quote_identifier(strVal(stmt->object)),
|
||||
quote_identifier(stmt->newname));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DeparseAlterPublicationOwnerStmt builds and returns a string representing the AlterOwnerStmt
|
||||
*/
|
||||
char *
|
||||
DeparseAlterPublicationOwnerStmt(Node *node)
|
||||
{
|
||||
AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node);
|
||||
StringInfoData str = { 0 };
|
||||
initStringInfo(&str);
|
||||
|
||||
Assert(stmt->objectType == OBJECT_PUBLICATION);
|
||||
|
||||
AppendAlterPublicationOwnerStmt(&str, stmt);
|
||||
|
||||
return str.data;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendAlterPublicationOwnerStmt appends a string representing the AlterOwnerStmt to a buffer
|
||||
*/
|
||||
static void
|
||||
AppendAlterPublicationOwnerStmt(StringInfo buf, AlterOwnerStmt *stmt)
|
||||
{
|
||||
Assert(stmt->objectType == OBJECT_PUBLICATION);
|
||||
|
||||
appendStringInfo(buf, "ALTER PUBLICATION %s OWNER TO %s;",
|
||||
quote_identifier(strVal(stmt->object)),
|
||||
RoleSpecString(stmt->newowner, true));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendPublicationOptions appends a string representing a list of publication opions.
|
||||
*/
|
||||
static void
|
||||
AppendPublicationOptions(StringInfo stringBuffer, List *optionList)
|
||||
{
|
||||
ListCell *optionCell = NULL;
|
||||
bool firstOptionPrinted = false;
|
||||
|
||||
foreach(optionCell, optionList)
|
||||
{
|
||||
DefElem *option = (DefElem *) lfirst(optionCell);
|
||||
char *optionName = option->defname;
|
||||
char *optionValue = defGetString(option);
|
||||
NodeTag valueType = nodeTag(option->arg);
|
||||
|
||||
if (firstOptionPrinted)
|
||||
{
|
||||
appendStringInfo(stringBuffer, ", ");
|
||||
}
|
||||
firstOptionPrinted = true;
|
||||
|
||||
appendStringInfo(stringBuffer, "%s = ",
|
||||
quote_identifier(optionName));
|
||||
|
||||
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||
if (valueType == T_Integer || valueType == T_Float || valueType == T_Boolean)
|
||||
#else
|
||||
if (valueType == T_Integer || valueType == T_Float)
|
||||
#endif
|
||||
{
|
||||
/* string escaping is unnecessary for numeric types and can cause issues */
|
||||
appendStringInfo(stringBuffer, "%s", optionValue);
|
||||
}
|
||||
else
|
||||
{
|
||||
appendStringInfo(stringBuffer, "%s", quote_literal_cstr(optionValue));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AppendIdentifierList appends a string representing a list of
|
||||
* identifiers (of String type).
|
||||
*/
|
||||
static void
|
||||
AppendIdentifierList(StringInfo buf, List *objects)
|
||||
{
|
||||
ListCell *objectCell = NULL;
|
||||
|
||||
foreach(objectCell, objects)
|
||||
{
|
||||
char *name = strVal(lfirst(objectCell));
|
||||
|
||||
if (objectCell != list_head(objects))
|
||||
{
|
||||
appendStringInfo(buf, ", ");
|
||||
}
|
||||
|
||||
appendStringInfoString(buf, quote_identifier(name));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,119 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* qualify_publication_stmt.c
|
||||
* Functions specialized in fully qualifying all publication statements. These
|
||||
* functions are dispatched from qualify.c
|
||||
*
|
||||
* Copyright (c), Citus Data, Inc.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "catalog/namespace.h"
|
||||
#include "distributed/deparser.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "nodes/nodes.h"
|
||||
#include "utils/guc.h"
|
||||
#include "utils/lsyscache.h"
|
||||
|
||||
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||
static void QualifyPublicationObjects(List *publicationObjects);
|
||||
#else
|
||||
static void QualifyTables(List *tables);
|
||||
#endif
|
||||
static void QualifyPublicationRangeVar(RangeVar *publication);
|
||||
|
||||
|
||||
/*
|
||||
* QualifyCreatePublicationStmt quailifies the publication names of the
|
||||
* CREATE PUBLICATION statement.
|
||||
*/
|
||||
void
|
||||
QualifyCreatePublicationStmt(Node *node)
|
||||
{
|
||||
CreatePublicationStmt *stmt = castNode(CreatePublicationStmt, node);
|
||||
|
||||
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||
QualifyPublicationObjects(stmt->pubobjects);
|
||||
#else
|
||||
QualifyTables(stmt->tables);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||
|
||||
/*
|
||||
* QualifyPublicationObjects ensures all table names in a list of
|
||||
* publication objects are fully qualified.
|
||||
*/
|
||||
static void
|
||||
QualifyPublicationObjects(List *publicationObjects)
|
||||
{
|
||||
PublicationObjSpec *publicationObject = NULL;
|
||||
|
||||
foreach_ptr(publicationObject, publicationObjects)
|
||||
{
|
||||
if (publicationObject->pubobjtype == PUBLICATIONOBJ_TABLE)
|
||||
{
|
||||
/* FOR TABLE ... */
|
||||
PublicationTable *publicationTable = publicationObject->pubtable;
|
||||
|
||||
QualifyPublicationRangeVar(publicationTable->relation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* QualifyTables ensures all table names in a list are fully qualified.
|
||||
*/
|
||||
static void
|
||||
QualifyTables(List *tables)
|
||||
{
|
||||
RangeVar *rangeVar = NULL;
|
||||
|
||||
foreach_ptr(rangeVar, tables)
|
||||
{
|
||||
QualifyPublicationRangeVar(rangeVar);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* QualifyPublicationObjects ensures all table names in a list of
|
||||
* publication objects are fully qualified.
|
||||
*/
|
||||
void
|
||||
QualifyAlterPublicationStmt(Node *node)
|
||||
{
|
||||
AlterPublicationStmt *stmt = castNode(AlterPublicationStmt, node);
|
||||
|
||||
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||
QualifyPublicationObjects(stmt->pubobjects);
|
||||
#else
|
||||
QualifyTables(stmt->tables);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* QualifyPublicationRangeVar qualifies the given publication RangeVar if it is not qualified.
|
||||
*/
|
||||
static void
|
||||
QualifyPublicationRangeVar(RangeVar *publication)
|
||||
{
|
||||
if (publication->schemaname == NULL)
|
||||
{
|
||||
Oid publicationOid = RelnameGetRelid(publication->relname);
|
||||
Oid schemaOid = get_rel_namespace(publicationOid);
|
||||
publication->schemaname = get_namespace_name(schemaOid);
|
||||
}
|
||||
}
|
|
@ -53,6 +53,7 @@
|
|||
#include "common/keywords.h"
|
||||
#include "distributed/citus_nodefuncs.h"
|
||||
#include "distributed/citus_ruleutils.h"
|
||||
#include "distributed/multi_router_planner.h"
|
||||
#include "executor/spi.h"
|
||||
#include "foreign/foreign.h"
|
||||
#include "funcapi.h"
|
||||
|
@ -3723,7 +3724,6 @@ static void
|
|||
get_merge_query_def(Query *query, deparse_context *context)
|
||||
{
|
||||
StringInfo buf = context->buf;
|
||||
RangeTblEntry *targetRte;
|
||||
|
||||
/* Insert the WITH clause if given */
|
||||
get_with_clause(query, context);
|
||||
|
@ -3731,7 +3731,7 @@ get_merge_query_def(Query *query, deparse_context *context)
|
|||
/*
|
||||
* Start the query with MERGE INTO <target>
|
||||
*/
|
||||
targetRte = rt_fetch(query->resultRelation, query->rtable);
|
||||
RangeTblEntry *targetRte = ExtractResultRelationRTE(query);
|
||||
|
||||
if (PRETTY_INDENT(context))
|
||||
{
|
||||
|
@ -3853,6 +3853,15 @@ get_merge_query_def(Query *query, deparse_context *context)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* RETURNING is not supported in MERGE, so it must be NULL, but if PG adds it later,
|
||||
* we might miss it, let's raise an exception to investigate.
|
||||
*/
|
||||
if (unlikely(query->returningList))
|
||||
{
|
||||
elog(ERROR, "Unexpected RETURNING clause in MERGE");
|
||||
}
|
||||
|
||||
ereport(DEBUG1, (errmsg("<Deparsed MERGE query: %s>", buf->data)));
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -9,6 +9,7 @@
|
|||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "distributed/distributed_execution_locks.h"
|
||||
#include "distributed/executor_util.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
|
@ -19,6 +20,259 @@
|
|||
#include "distributed/transaction_management.h"
|
||||
|
||||
|
||||
/*
|
||||
* AcquireExecutorShardLocksForExecution acquires advisory lock on shard IDs
|
||||
* to prevent unsafe concurrent modifications of shards.
|
||||
*
|
||||
* We prevent concurrent modifications of shards in two cases:
|
||||
* 1. Any non-commutative writes to a replicated table
|
||||
* 2. Multi-shard writes that are executed in parallel
|
||||
*
|
||||
* The first case ensures we do not apply updates in different orders on
|
||||
* different replicas (e.g. of a reference table), which could lead the
|
||||
* replicas to diverge.
|
||||
*
|
||||
* The second case prevents deadlocks due to out-of-order execution.
|
||||
*
|
||||
* There are two GUCs that can override the default behaviors.
|
||||
* 'citus.all_modifications_commutative' relaxes locking
|
||||
* that's done for the purpose of keeping replicas consistent.
|
||||
* 'citus.enable_deadlock_prevention' relaxes locking done for
|
||||
* the purpose of avoiding deadlocks between concurrent
|
||||
* multi-shard commands.
|
||||
*
|
||||
* We do not take executor shard locks for utility commands such as
|
||||
* TRUNCATE because the table locks already prevent concurrent access.
|
||||
*/
|
||||
void
|
||||
AcquireExecutorShardLocksForExecution(RowModifyLevel modLevel, List *taskList)
|
||||
{
|
||||
if (modLevel <= ROW_MODIFY_READONLY &&
|
||||
!SelectForUpdateOnReferenceTable(taskList))
|
||||
{
|
||||
/*
|
||||
* Executor locks only apply to DML commands and SELECT FOR UPDATE queries
|
||||
* touching reference tables.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
bool requiresParallelExecutionLocks =
|
||||
!(list_length(taskList) == 1 || ShouldRunTasksSequentially(taskList));
|
||||
|
||||
bool modifiedTableReplicated = ModifiedTableReplicated(taskList);
|
||||
if (!modifiedTableReplicated && !requiresParallelExecutionLocks)
|
||||
{
|
||||
/*
|
||||
* When a distributed query on tables with replication
|
||||
* factor == 1 and command hits only a single shard, we
|
||||
* rely on Postgres to handle the serialization of the
|
||||
* concurrent modifications on the workers.
|
||||
*
|
||||
* For reference tables, even if their placements are replicated
|
||||
* ones (e.g., single node), we acquire the distributed execution
|
||||
* locks to be consistent when new node(s) are added. So, they
|
||||
* do not return at this point.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We first assume that all the remaining modifications are going to
|
||||
* be serialized. So, start with an ExclusiveLock and lower the lock level
|
||||
* as much as possible.
|
||||
*/
|
||||
int lockMode = ExclusiveLock;
|
||||
|
||||
/*
|
||||
* In addition to honouring commutativity rules, we currently only
|
||||
* allow a single multi-shard command on a shard at a time. Otherwise,
|
||||
* concurrent multi-shard commands may take row-level locks on the
|
||||
* shard placements in a different order and create a distributed
|
||||
* deadlock. This applies even when writes are commutative and/or
|
||||
* there is no replication. This can be relaxed via
|
||||
* EnableDeadlockPrevention.
|
||||
*
|
||||
* 1. If citus.all_modifications_commutative is set to true, then all locks
|
||||
* are acquired as RowExclusiveLock.
|
||||
*
|
||||
* 2. If citus.all_modifications_commutative is false, then only the shards
|
||||
* with more than one replicas are locked with ExclusiveLock. Otherwise, the
|
||||
* lock is acquired with ShareUpdateExclusiveLock.
|
||||
*
|
||||
* ShareUpdateExclusiveLock conflicts with itself such that only one
|
||||
* multi-shard modification at a time is allowed on a shard. It also conflicts
|
||||
* with ExclusiveLock, which ensures that updates/deletes/upserts are applied
|
||||
* in the same order on all placements. It does not conflict with
|
||||
* RowExclusiveLock, which is normally obtained by single-shard, commutative
|
||||
* writes.
|
||||
*/
|
||||
if (!modifiedTableReplicated && requiresParallelExecutionLocks)
|
||||
{
|
||||
/*
|
||||
* When there is no replication then we only need to prevent
|
||||
* concurrent multi-shard commands on the same shards. This is
|
||||
* because concurrent, parallel commands may modify the same
|
||||
* set of shards, but in different orders. The order of the
|
||||
* accesses might trigger distributed deadlocks that are not
|
||||
* possible to happen on non-distributed systems such
|
||||
* regular Postgres.
|
||||
*
|
||||
* As an example, assume that we have two queries: query-1 and query-2.
|
||||
* Both queries access shard-1 and shard-2. If query-1 first accesses to
|
||||
* shard-1 then shard-2, and query-2 accesses shard-2 then shard-1, these
|
||||
* two commands might block each other in case they modify the same rows
|
||||
* (e.g., cause distributed deadlocks).
|
||||
*
|
||||
* In either case, ShareUpdateExclusive has the desired effect, since
|
||||
* it conflicts with itself and ExclusiveLock (taken by non-commutative
|
||||
* writes).
|
||||
*
|
||||
* However, some users find this too restrictive, so we allow them to
|
||||
* reduce to a RowExclusiveLock when citus.enable_deadlock_prevention
|
||||
* is enabled, which lets multi-shard modifications run in parallel as
|
||||
* long as they all disable the GUC.
|
||||
*/
|
||||
lockMode =
|
||||
EnableDeadlockPrevention ? ShareUpdateExclusiveLock : RowExclusiveLock;
|
||||
|
||||
if (!IsCoordinator())
|
||||
{
|
||||
/*
|
||||
* We also skip taking a heavy-weight lock when running a multi-shard
|
||||
* commands from workers, since we currently do not prevent concurrency
|
||||
* across workers anyway.
|
||||
*/
|
||||
lockMode = RowExclusiveLock;
|
||||
}
|
||||
}
|
||||
else if (modifiedTableReplicated)
|
||||
{
|
||||
/*
|
||||
* When we are executing distributed queries on replicated tables, our
|
||||
* default behaviour is to prevent any concurrency. This is valid
|
||||
* for when parallel execution is happening or not.
|
||||
*
|
||||
* The reason is that we cannot control the order of the placement accesses
|
||||
* of two distributed queries to the same shards. The order of the accesses
|
||||
* might cause the replicas of the same shard placements diverge. This is
|
||||
* not possible to happen on non-distributed systems such regular Postgres.
|
||||
*
|
||||
* As an example, assume that we have two queries: query-1 and query-2.
|
||||
* Both queries only access the placements of shard-1, say p-1 and p-2.
|
||||
*
|
||||
* And, assume that these queries are non-commutative, such as:
|
||||
* query-1: UPDATE table SET b = 1 WHERE key = 1;
|
||||
* query-2: UPDATE table SET b = 2 WHERE key = 1;
|
||||
*
|
||||
* If query-1 accesses to p-1 then p-2, and query-2 accesses
|
||||
* p-2 then p-1, these two commands would leave the p-1 and p-2
|
||||
* diverged (e.g., the values for the column "b" would be different).
|
||||
*
|
||||
* The only exception to this rule is the single shard commutative
|
||||
* modifications, such as INSERTs. In that case, we can allow
|
||||
* concurrency among such backends, hence lowering the lock level
|
||||
* to RowExclusiveLock.
|
||||
*/
|
||||
if (!requiresParallelExecutionLocks && modLevel < ROW_MODIFY_NONCOMMUTATIVE)
|
||||
{
|
||||
lockMode = RowExclusiveLock;
|
||||
}
|
||||
}
|
||||
|
||||
if (AllModificationsCommutative)
|
||||
{
|
||||
/*
|
||||
* The mapping is overridden when all_modifications_commutative is set to true.
|
||||
* In that case, all modifications are treated as commutative, which can be used
|
||||
* to communicate that the application is only generating commutative
|
||||
* UPDATE/DELETE/UPSERT commands and exclusive locks are unnecessary. This
|
||||
* is irrespective of single-shard/multi-shard or replicated tables.
|
||||
*/
|
||||
lockMode = RowExclusiveLock;
|
||||
}
|
||||
|
||||
/* now, iterate on the tasks and acquire the executor locks on the shards */
|
||||
List *anchorShardIntervalList = NIL;
|
||||
List *relationRowLockList = NIL;
|
||||
List *requiresConsistentSnapshotRelationShardList = NIL;
|
||||
|
||||
Task *task = NULL;
|
||||
foreach_ptr(task, taskList)
|
||||
{
|
||||
ShardInterval *anchorShardInterval = LoadShardInterval(task->anchorShardId);
|
||||
anchorShardIntervalList = lappend(anchorShardIntervalList, anchorShardInterval);
|
||||
|
||||
/* Acquire additional locks for SELECT .. FOR UPDATE on reference tables */
|
||||
AcquireExecutorShardLocksForRelationRowLockList(task->relationRowLockList);
|
||||
|
||||
relationRowLockList =
|
||||
list_concat(relationRowLockList,
|
||||
task->relationRowLockList);
|
||||
|
||||
/*
|
||||
* If the task has a subselect, then we may need to lock the shards from which
|
||||
* the query selects as well to prevent the subselects from seeing different
|
||||
* results on different replicas.
|
||||
*/
|
||||
if (RequiresConsistentSnapshot(task))
|
||||
{
|
||||
/*
|
||||
* ExclusiveLock conflicts with all lock types used by modifications
|
||||
* and therefore prevents other modifications from running
|
||||
* concurrently.
|
||||
*/
|
||||
requiresConsistentSnapshotRelationShardList =
|
||||
list_concat(requiresConsistentSnapshotRelationShardList,
|
||||
task->relationShardList);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Acquire the locks in a sorted way to avoid deadlocks due to lock
|
||||
* ordering across concurrent sessions.
|
||||
*/
|
||||
anchorShardIntervalList =
|
||||
SortList(anchorShardIntervalList, CompareShardIntervalsById);
|
||||
|
||||
/*
|
||||
* If we are dealing with a partition we are also taking locks on parent table
|
||||
* to prevent deadlocks on concurrent operations on a partition and its parent.
|
||||
*
|
||||
* Note that this function currently does not acquire any remote locks as that
|
||||
* is necessary to control the concurrency across multiple nodes for replicated
|
||||
* tables. That is because Citus currently does not allow modifications to
|
||||
* partitions from any node other than the coordinator.
|
||||
*/
|
||||
LockParentShardResourceIfPartition(anchorShardIntervalList, lockMode);
|
||||
|
||||
/* Acquire distribution execution locks on the affected shards */
|
||||
SerializeNonCommutativeWrites(anchorShardIntervalList, lockMode);
|
||||
|
||||
if (relationRowLockList != NIL)
|
||||
{
|
||||
/* Acquire additional locks for SELECT .. FOR UPDATE on reference tables */
|
||||
AcquireExecutorShardLocksForRelationRowLockList(relationRowLockList);
|
||||
}
|
||||
|
||||
|
||||
if (requiresConsistentSnapshotRelationShardList != NIL)
|
||||
{
|
||||
/*
|
||||
* If the task has a subselect, then we may need to lock the shards from which
|
||||
* the query selects as well to prevent the subselects from seeing different
|
||||
* results on different replicas.
|
||||
*
|
||||
* ExclusiveLock conflicts with all lock types used by modifications
|
||||
* and therefore prevents other modifications from running
|
||||
* concurrently.
|
||||
*/
|
||||
LockRelationShardResources(requiresConsistentSnapshotRelationShardList,
|
||||
ExclusiveLock);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RequiresConsistentSnapshot returns true if the given task need to take
|
||||
* the necessary locks to ensure that a subquery in the modify query
|
||||
|
@ -188,3 +442,27 @@ LockPartitionRelations(Oid relationId, LOCKMODE lockMode)
|
|||
LockRelationOid(partitionRelationId, lockMode);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* LockPartitionsForDistributedPlan ensures commands take locks on all partitions
|
||||
* of a distributed table that appears in the query. We do this primarily out of
|
||||
* consistency with PostgreSQL locking.
|
||||
*/
|
||||
void
|
||||
LockPartitionsForDistributedPlan(DistributedPlan *plan)
|
||||
{
|
||||
if (TaskListModifiesDatabase(plan->modLevel, plan->workerJob->taskList))
|
||||
{
|
||||
Oid targetRelationId = plan->targetRelationId;
|
||||
|
||||
LockPartitionsInRelationList(list_make1_oid(targetRelationId), RowExclusiveLock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock partitions of tables that appear in a SELECT or subquery. In the
|
||||
* DML case this also includes the target relation, but since we already
|
||||
* have a stronger lock this doesn't do any harm.
|
||||
*/
|
||||
LockPartitionsInRelationList(plan->relationIdList, AccessShareLock);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* executor_util_tasks.c
|
||||
*
|
||||
* Utility functions for dealing with task lists in the executor.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
#include "funcapi.h"
|
||||
#include "miscadmin.h"
|
||||
|
||||
#include "distributed/executor_util.h"
|
||||
#include "utils/lsyscache.h"
|
||||
|
||||
|
||||
/*
|
||||
* ExtractParametersForRemoteExecution extracts parameter types and values from
|
||||
* the given ParamListInfo structure, and fills parameter type and value arrays.
|
||||
* It changes oid of custom types to InvalidOid so that they are the same in workers
|
||||
* and coordinators.
|
||||
*/
|
||||
void
|
||||
ExtractParametersForRemoteExecution(ParamListInfo paramListInfo, Oid **parameterTypes,
|
||||
const char ***parameterValues)
|
||||
{
|
||||
ExtractParametersFromParamList(paramListInfo, parameterTypes,
|
||||
parameterValues, false);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ExtractParametersFromParamList extracts parameter types and values from
|
||||
* the given ParamListInfo structure, and fills parameter type and value arrays.
|
||||
* If useOriginalCustomTypeOids is true, it uses the original oids for custom types.
|
||||
*/
|
||||
void
|
||||
ExtractParametersFromParamList(ParamListInfo paramListInfo,
|
||||
Oid **parameterTypes,
|
||||
const char ***parameterValues, bool
|
||||
useOriginalCustomTypeOids)
|
||||
{
|
||||
int parameterCount = paramListInfo->numParams;
|
||||
|
||||
*parameterTypes = (Oid *) palloc0(parameterCount * sizeof(Oid));
|
||||
*parameterValues = (const char **) palloc0(parameterCount * sizeof(char *));
|
||||
|
||||
/* get parameter types and values */
|
||||
for (int parameterIndex = 0; parameterIndex < parameterCount; parameterIndex++)
|
||||
{
|
||||
ParamExternData *parameterData = ¶mListInfo->params[parameterIndex];
|
||||
Oid typeOutputFunctionId = InvalidOid;
|
||||
bool variableLengthType = false;
|
||||
|
||||
/*
|
||||
* Use 0 for data types where the oid values can be different on
|
||||
* the coordinator and worker nodes. Therefore, the worker nodes can
|
||||
* infer the correct oid.
|
||||
*/
|
||||
if (parameterData->ptype >= FirstNormalObjectId && !useOriginalCustomTypeOids)
|
||||
{
|
||||
(*parameterTypes)[parameterIndex] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
(*parameterTypes)[parameterIndex] = parameterData->ptype;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the parameter is not referenced / used (ptype == 0) and
|
||||
* would otherwise have errored out inside standard_planner()),
|
||||
* don't pass a value to the remote side, and pass text oid to prevent
|
||||
* undetermined data type errors on workers.
|
||||
*/
|
||||
if (parameterData->ptype == 0)
|
||||
{
|
||||
(*parameterValues)[parameterIndex] = NULL;
|
||||
(*parameterTypes)[parameterIndex] = TEXTOID;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the parameter is NULL then we preserve its type, but
|
||||
* don't need to evaluate its value.
|
||||
*/
|
||||
if (parameterData->isnull)
|
||||
{
|
||||
(*parameterValues)[parameterIndex] = NULL;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
getTypeOutputInfo(parameterData->ptype, &typeOutputFunctionId,
|
||||
&variableLengthType);
|
||||
|
||||
(*parameterValues)[parameterIndex] = OidOutputFunctionCall(typeOutputFunctionId,
|
||||
parameterData->value);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,297 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* executor_util_tasks.c
|
||||
*
|
||||
* Utility functions for dealing with task lists in the executor.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
#include "funcapi.h"
|
||||
#include "miscadmin.h"
|
||||
|
||||
#include "distributed/executor_util.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/shardinterval_utils.h"
|
||||
|
||||
|
||||
/*
|
||||
* TaskListModifiesDatabase is a helper function for DistributedExecutionModifiesDatabase and
|
||||
* DistributedPlanModifiesDatabase.
|
||||
*/
|
||||
bool
|
||||
TaskListModifiesDatabase(RowModifyLevel modLevel, List *taskList)
|
||||
{
|
||||
if (modLevel > ROW_MODIFY_READONLY)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we cannot decide by only checking the row modify level,
|
||||
* we should look closer to the tasks.
|
||||
*/
|
||||
if (list_length(taskList) < 1)
|
||||
{
|
||||
/* is this ever possible? */
|
||||
return false;
|
||||
}
|
||||
|
||||
Task *firstTask = (Task *) linitial(taskList);
|
||||
|
||||
return !ReadOnlyTask(firstTask->taskType);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TaskListRequiresRollback returns true if the distributed
|
||||
* execution should start a CoordinatedTransaction. In other words, if the
|
||||
* function returns true, the execution sends BEGIN; to every connection
|
||||
* involved in the distributed execution.
|
||||
*/
|
||||
bool
|
||||
TaskListRequiresRollback(List *taskList)
|
||||
{
|
||||
int taskCount = list_length(taskList);
|
||||
|
||||
if (taskCount == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
Task *task = (Task *) linitial(taskList);
|
||||
if (task->cannotBeExecutedInTransction)
|
||||
{
|
||||
/* vacuum, create index concurrently etc. */
|
||||
return false;
|
||||
}
|
||||
|
||||
bool selectForUpdate = task->relationRowLockList != NIL;
|
||||
if (selectForUpdate)
|
||||
{
|
||||
/*
|
||||
* Do not check SelectOpensTransactionBlock, always open transaction block
|
||||
* if SELECT FOR UPDATE is executed inside a distributed transaction.
|
||||
*/
|
||||
return IsMultiStatementTransaction();
|
||||
}
|
||||
|
||||
if (ReadOnlyTask(task->taskType))
|
||||
{
|
||||
return SelectOpensTransactionBlock &&
|
||||
IsTransactionBlock();
|
||||
}
|
||||
|
||||
if (IsMultiStatementTransaction())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (list_length(taskList) > 1)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (list_length(task->taskPlacementList) > 1)
|
||||
{
|
||||
/*
|
||||
* Single DML/DDL tasks with replicated tables (including
|
||||
* reference and non-reference tables) should require
|
||||
* BEGIN/COMMIT/ROLLBACK.
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
|
||||
if (task->queryCount > 1)
|
||||
{
|
||||
/*
|
||||
* When there are multiple sequential queries in a task
|
||||
* we need to run those as a transaction.
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TaskListRequires2PC determines whether the given task list requires 2PC.
|
||||
*/
|
||||
bool
|
||||
TaskListRequires2PC(List *taskList)
|
||||
{
|
||||
if (taskList == NIL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
Task *task = (Task *) linitial(taskList);
|
||||
if (ReadOnlyTask(task->taskType))
|
||||
{
|
||||
/* we do not trigger 2PC for ReadOnly queries */
|
||||
return false;
|
||||
}
|
||||
|
||||
bool singleTask = list_length(taskList) == 1;
|
||||
if (singleTask && list_length(task->taskPlacementList) == 1)
|
||||
{
|
||||
/* we do not trigger 2PC for modifications that are:
|
||||
* - single task
|
||||
* - single placement
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Otherwise, all modifications are done via 2PC. This includes:
|
||||
* - Multi-shard commands irrespective of the replication factor
|
||||
* - Single-shard commands that are targeting more than one replica
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TaskListCannotBeExecutedInTransaction returns true if any of the
|
||||
* tasks in the input cannot be executed in a transaction. These are
|
||||
* tasks like VACUUM or CREATE INDEX CONCURRENTLY etc.
|
||||
*/
|
||||
bool
|
||||
TaskListCannotBeExecutedInTransaction(List *taskList)
|
||||
{
|
||||
Task *task = NULL;
|
||||
foreach_ptr(task, taskList)
|
||||
{
|
||||
if (task->cannotBeExecutedInTransction)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SelectForUpdateOnReferenceTable returns true if the input task
|
||||
* contains a FOR UPDATE clause that locks any reference tables.
|
||||
*/
|
||||
bool
|
||||
SelectForUpdateOnReferenceTable(List *taskList)
|
||||
{
|
||||
if (list_length(taskList) != 1)
|
||||
{
|
||||
/* we currently do not support SELECT FOR UPDATE on multi task queries */
|
||||
return false;
|
||||
}
|
||||
|
||||
Task *task = (Task *) linitial(taskList);
|
||||
RelationRowLock *relationRowLock = NULL;
|
||||
foreach_ptr(relationRowLock, task->relationRowLockList)
|
||||
{
|
||||
Oid relationId = relationRowLock->relationId;
|
||||
|
||||
if (IsCitusTableType(relationId, REFERENCE_TABLE))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ReadOnlyTask returns true if the input task does a read-only operation
|
||||
* on the database.
|
||||
*/
|
||||
bool
|
||||
ReadOnlyTask(TaskType taskType)
|
||||
{
|
||||
switch (taskType)
|
||||
{
|
||||
case READ_TASK:
|
||||
case MAP_OUTPUT_FETCH_TASK:
|
||||
case MAP_TASK:
|
||||
case MERGE_TASK:
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ModifiedTableReplicated iterates on the task list and returns true
|
||||
* if any of the tasks' anchor shard is a replicated table. We qualify
|
||||
* replicated tables as any reference table or any distributed table with
|
||||
* replication factor > 1.
|
||||
*/
|
||||
bool
|
||||
ModifiedTableReplicated(List *taskList)
|
||||
{
|
||||
Task *task = NULL;
|
||||
foreach_ptr(task, taskList)
|
||||
{
|
||||
int64 shardId = task->anchorShardId;
|
||||
|
||||
if (shardId == INVALID_SHARD_ID)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ReferenceTableShardId(shardId))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
Oid relationId = RelationIdForShard(shardId);
|
||||
if (!SingleReplicatedTable(relationId))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ShouldRunTasksSequentially returns true if each of the individual tasks
|
||||
* should be executed one by one. Note that this is different than
|
||||
* MultiShardConnectionType == SEQUENTIAL_CONNECTION case. In that case,
|
||||
* running the tasks across the nodes in parallel is acceptable and implemented
|
||||
* in that way.
|
||||
*
|
||||
* However, the executions that are qualified here would perform poorly if the
|
||||
* tasks across the workers are executed in parallel. We currently qualify only
|
||||
* one class of distributed queries here, multi-row INSERTs. If we do not enforce
|
||||
* true sequential execution, concurrent multi-row upserts could easily form
|
||||
* a distributed deadlock when the upserts touch the same rows.
|
||||
*/
|
||||
bool
|
||||
ShouldRunTasksSequentially(List *taskList)
|
||||
{
|
||||
if (list_length(taskList) < 2)
|
||||
{
|
||||
/* single task plans are already qualified as sequential by definition */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* all the tasks are the same, so we only look one */
|
||||
Task *initialTask = (Task *) linitial(taskList);
|
||||
if (initialTask->rowValuesLists != NIL)
|
||||
{
|
||||
/* found a multi-row INSERT */
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
|
@ -0,0 +1,129 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* executor_util_tuples.c
|
||||
*
|
||||
* Utility functions for handling tuples during remote execution.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
#include "funcapi.h"
|
||||
#include "miscadmin.h"
|
||||
|
||||
#include "distributed/executor_util.h"
|
||||
#include "utils/lsyscache.h"
|
||||
|
||||
|
||||
/*
|
||||
* TupleDescGetAttBinaryInMetadata - Build an AttInMetadata structure based on
|
||||
* the supplied TupleDesc. AttInMetadata can be used in conjunction with
|
||||
* fmStringInfos containing binary encoded types to produce a properly formed
|
||||
* tuple.
|
||||
*
|
||||
* NOTE: This function is a copy of the PG function TupleDescGetAttInMetadata,
|
||||
* except that it uses getTypeBinaryInputInfo instead of getTypeInputInfo.
|
||||
*/
|
||||
AttInMetadata *
|
||||
TupleDescGetAttBinaryInMetadata(TupleDesc tupdesc)
|
||||
{
|
||||
int natts = tupdesc->natts;
|
||||
int i;
|
||||
Oid atttypeid;
|
||||
Oid attinfuncid;
|
||||
|
||||
AttInMetadata *attinmeta = (AttInMetadata *) palloc(sizeof(AttInMetadata));
|
||||
|
||||
/* "Bless" the tupledesc so that we can make rowtype datums with it */
|
||||
attinmeta->tupdesc = BlessTupleDesc(tupdesc);
|
||||
|
||||
/*
|
||||
* Gather info needed later to call the "in" function for each attribute
|
||||
*/
|
||||
FmgrInfo *attinfuncinfo = (FmgrInfo *) palloc0(natts * sizeof(FmgrInfo));
|
||||
Oid *attioparams = (Oid *) palloc0(natts * sizeof(Oid));
|
||||
int32 *atttypmods = (int32 *) palloc0(natts * sizeof(int32));
|
||||
|
||||
for (i = 0; i < natts; i++)
|
||||
{
|
||||
Form_pg_attribute att = TupleDescAttr(tupdesc, i);
|
||||
|
||||
/* Ignore dropped attributes */
|
||||
if (!att->attisdropped)
|
||||
{
|
||||
atttypeid = att->atttypid;
|
||||
getTypeBinaryInputInfo(atttypeid, &attinfuncid, &attioparams[i]);
|
||||
fmgr_info(attinfuncid, &attinfuncinfo[i]);
|
||||
atttypmods[i] = att->atttypmod;
|
||||
}
|
||||
}
|
||||
attinmeta->attinfuncs = attinfuncinfo;
|
||||
attinmeta->attioparams = attioparams;
|
||||
attinmeta->atttypmods = atttypmods;
|
||||
|
||||
return attinmeta;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* BuildTupleFromBytes - build a HeapTuple given user data in binary form.
|
||||
* values is an array of StringInfos, one for each attribute of the return
|
||||
* tuple. A NULL StringInfo pointer indicates we want to create a NULL field.
|
||||
*
|
||||
* NOTE: This function is a copy of the PG function BuildTupleFromCStrings,
|
||||
* except that it uses ReceiveFunctionCall instead of InputFunctionCall.
|
||||
*/
|
||||
HeapTuple
|
||||
BuildTupleFromBytes(AttInMetadata *attinmeta, fmStringInfo *values)
|
||||
{
|
||||
TupleDesc tupdesc = attinmeta->tupdesc;
|
||||
int natts = tupdesc->natts;
|
||||
int i;
|
||||
|
||||
Datum *dvalues = (Datum *) palloc(natts * sizeof(Datum));
|
||||
bool *nulls = (bool *) palloc(natts * sizeof(bool));
|
||||
|
||||
/*
|
||||
* Call the "in" function for each non-dropped attribute, even for nulls,
|
||||
* to support domains.
|
||||
*/
|
||||
for (i = 0; i < natts; i++)
|
||||
{
|
||||
if (!TupleDescAttr(tupdesc, i)->attisdropped)
|
||||
{
|
||||
/* Non-dropped attributes */
|
||||
dvalues[i] = ReceiveFunctionCall(&attinmeta->attinfuncs[i],
|
||||
values[i],
|
||||
attinmeta->attioparams[i],
|
||||
attinmeta->atttypmods[i]);
|
||||
if (values[i] != NULL)
|
||||
{
|
||||
nulls[i] = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
nulls[i] = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Handle dropped attributes by setting to NULL */
|
||||
dvalues[i] = (Datum) 0;
|
||||
nulls[i] = true;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Form a tuple
|
||||
*/
|
||||
HeapTuple tuple = heap_form_tuple(tupdesc, dvalues, nulls);
|
||||
|
||||
/*
|
||||
* Release locally palloc'd space. XXX would probably be good to pfree
|
||||
* values of pass-by-reference datums, as well.
|
||||
*/
|
||||
pfree(dvalues);
|
||||
pfree(nulls);
|
||||
|
||||
return tuple;
|
||||
}
|
|
@ -409,11 +409,13 @@ ExecutePlanIntoColocatedIntermediateResults(Oid targetRelationId,
|
|||
columnNameList);
|
||||
|
||||
/* set up a DestReceiver that copies into the intermediate table */
|
||||
const bool publishableData = true;
|
||||
CitusCopyDestReceiver *copyDest = CreateCitusCopyDestReceiver(targetRelationId,
|
||||
columnNameList,
|
||||
partitionColumnIndex,
|
||||
executorState,
|
||||
intermediateResultIdPrefix);
|
||||
intermediateResultIdPrefix,
|
||||
publishableData);
|
||||
|
||||
ExecutePlanIntoDestReceiver(selectPlan, paramListInfo, (DestReceiver *) copyDest);
|
||||
|
||||
|
@ -443,10 +445,12 @@ ExecutePlanIntoRelation(Oid targetRelationId, List *insertTargetList,
|
|||
columnNameList);
|
||||
|
||||
/* set up a DestReceiver that copies into the distributed table */
|
||||
const bool publishableData = true;
|
||||
CitusCopyDestReceiver *copyDest = CreateCitusCopyDestReceiver(targetRelationId,
|
||||
columnNameList,
|
||||
partitionColumnIndex,
|
||||
executorState, NULL);
|
||||
executorState, NULL,
|
||||
publishableData);
|
||||
|
||||
ExecutePlanIntoDestReceiver(selectPlan, paramListInfo, (DestReceiver *) copyDest);
|
||||
|
||||
|
|
|
@ -90,6 +90,7 @@
|
|||
#include "distributed/local_executor.h"
|
||||
#include "distributed/local_plan_cache.h"
|
||||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/executor_util.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/multi_executor.h"
|
||||
#include "distributed/multi_server_executor.h"
|
||||
|
|
|
@ -802,6 +802,11 @@ GetObjectTypeString(ObjectType objType)
|
|||
return "function";
|
||||
}
|
||||
|
||||
case OBJECT_PUBLICATION:
|
||||
{
|
||||
return "publication";
|
||||
}
|
||||
|
||||
case OBJECT_SCHEMA:
|
||||
{
|
||||
return "schema";
|
||||
|
|
|
@ -132,6 +132,7 @@ typedef struct ViewDependencyNode
|
|||
static List * GetRelationSequenceDependencyList(Oid relationId);
|
||||
static List * GetRelationFunctionDependencyList(Oid relationId);
|
||||
static List * GetRelationTriggerFunctionDependencyList(Oid relationId);
|
||||
static List * GetPublicationRelationsDependencyList(Oid relationId);
|
||||
static List * GetRelationStatsSchemaDependencyList(Oid relationId);
|
||||
static List * GetRelationIndicesDependencyList(Oid relationId);
|
||||
static DependencyDefinition * CreateObjectAddressDependencyDef(Oid classId, Oid objectId);
|
||||
|
@ -722,6 +723,11 @@ SupportedDependencyByCitus(const ObjectAddress *address)
|
|||
return true;
|
||||
}
|
||||
|
||||
case OCLASS_PUBLICATION:
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
case OCLASS_TSCONFIG:
|
||||
{
|
||||
return true;
|
||||
|
@ -1656,6 +1662,36 @@ ExpandCitusSupportedTypes(ObjectAddressCollector *collector, ObjectAddress targe
|
|||
List *ruleRefDepList = GetViewRuleReferenceDependencyList(relationId);
|
||||
result = list_concat(result, ruleRefDepList);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case PublicationRelationId:
|
||||
{
|
||||
Oid publicationId = target.objectId;
|
||||
|
||||
/*
|
||||
* Publications do not depend directly on relations, because dropping
|
||||
* the relation will only remove it from the publications. However,
|
||||
* we add a dependency to ensure the relation is created first when
|
||||
* adding a node.
|
||||
*/
|
||||
List *relationDependencyList =
|
||||
GetPublicationRelationsDependencyList(publicationId);
|
||||
result = list_concat(result, relationDependencyList);
|
||||
|
||||
/*
|
||||
* As of PostgreSQL 15, the same applies to schemas.
|
||||
*/
|
||||
#if PG_VERSION_NUM >= PG_VERSION_15
|
||||
List *schemaIdList =
|
||||
GetPublicationSchemas(publicationId);
|
||||
List *schemaDependencyList =
|
||||
CreateObjectAddressDependencyDefList(NamespaceRelationId, schemaIdList);
|
||||
result = list_concat(result, schemaDependencyList);
|
||||
#endif
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
|
@ -1834,7 +1870,7 @@ static List *
|
|||
GetRelationSequenceDependencyList(Oid relationId)
|
||||
{
|
||||
List *seqInfoList = NIL;
|
||||
GetDependentSequencesWithRelation(relationId, &seqInfoList, 0);
|
||||
GetDependentSequencesWithRelation(relationId, &seqInfoList, 0, DEPENDENCY_AUTO);
|
||||
|
||||
List *seqIdList = NIL;
|
||||
SequenceInfo *seqInfo = NULL;
|
||||
|
@ -1923,6 +1959,33 @@ GetRelationTriggerFunctionDependencyList(Oid relationId)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetPublicationRelationsDependencyList creates a list of ObjectAddressDependencies for
|
||||
* a publication on the Citus relations it contains. This helps make sure we distribute
|
||||
* Citus tables before local tables.
|
||||
*/
|
||||
static List *
|
||||
GetPublicationRelationsDependencyList(Oid publicationId)
|
||||
{
|
||||
List *allRelationIds = GetPublicationRelations(publicationId, PUBLICATION_PART_ROOT);
|
||||
List *citusRelationIds = NIL;
|
||||
|
||||
Oid relationId = InvalidOid;
|
||||
|
||||
foreach_oid(relationId, allRelationIds)
|
||||
{
|
||||
if (!IsCitusTable(relationId))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
citusRelationIds = lappend_oid(citusRelationIds, relationId);
|
||||
}
|
||||
|
||||
return CreateObjectAddressDependencyDefList(RelationRelationId, citusRelationIds);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetTypeConstraintDependencyDefinition creates a list of constraint dependency
|
||||
* definitions for a given type
|
||||
|
|
|
@ -311,7 +311,7 @@ static void InvalidateDistTableCache(void);
|
|||
static void InvalidateDistObjectCache(void);
|
||||
static bool InitializeTableCacheEntry(int64 shardId, bool missingOk);
|
||||
static bool IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
|
||||
CitusTableType tableType);
|
||||
uint32 colocationId, CitusTableType tableType);
|
||||
static bool RefreshTableCacheEntryIfInvalid(ShardIdCacheEntry *shardEntry, bool
|
||||
missingOk);
|
||||
|
||||
|
@ -450,7 +450,36 @@ bool
|
|||
IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEntry, CitusTableType tableType)
|
||||
{
|
||||
return IsCitusTableTypeInternal(tableEntry->partitionMethod,
|
||||
tableEntry->replicationModel, tableType);
|
||||
tableEntry->replicationModel,
|
||||
tableEntry->colocationId, tableType);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* HasDistributionKey returs true if given Citus table doesn't have a
|
||||
* distribution key.
|
||||
*/
|
||||
bool
|
||||
HasDistributionKey(Oid relationId)
|
||||
{
|
||||
CitusTableCacheEntry *tableEntry = LookupCitusTableCacheEntry(relationId);
|
||||
if (tableEntry == NULL)
|
||||
{
|
||||
ereport(ERROR, (errmsg("relation with oid %u is not a Citus table", relationId)));
|
||||
}
|
||||
|
||||
return HasDistributionKeyCacheEntry(tableEntry);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* HasDistributionKey returs true if given cache entry identifies a Citus
|
||||
* table that doesn't have a distribution key.
|
||||
*/
|
||||
bool
|
||||
HasDistributionKeyCacheEntry(CitusTableCacheEntry *tableEntry)
|
||||
{
|
||||
return tableEntry->partitionMethod != DISTRIBUTE_BY_NONE;
|
||||
}
|
||||
|
||||
|
||||
|
@ -460,7 +489,7 @@ IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEntry, CitusTableType tabl
|
|||
*/
|
||||
static bool
|
||||
IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
|
||||
CitusTableType tableType)
|
||||
uint32 colocationId, CitusTableType tableType)
|
||||
{
|
||||
switch (tableType)
|
||||
{
|
||||
|
@ -501,12 +530,8 @@ IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
|
|||
case CITUS_LOCAL_TABLE:
|
||||
{
|
||||
return partitionMethod == DISTRIBUTE_BY_NONE &&
|
||||
replicationModel != REPLICATION_MODEL_2PC;
|
||||
}
|
||||
|
||||
case CITUS_TABLE_WITH_NO_DIST_KEY:
|
||||
{
|
||||
return partitionMethod == DISTRIBUTE_BY_NONE;
|
||||
replicationModel != REPLICATION_MODEL_2PC &&
|
||||
colocationId == INVALID_COLOCATION_ID;
|
||||
}
|
||||
|
||||
case ANY_CITUS_TABLE_TYPE:
|
||||
|
@ -529,33 +554,21 @@ IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
|
|||
char *
|
||||
GetTableTypeName(Oid tableId)
|
||||
{
|
||||
bool regularTable = false;
|
||||
char partitionMethod = ' ';
|
||||
char replicationModel = ' ';
|
||||
if (IsCitusTable(tableId))
|
||||
{
|
||||
CitusTableCacheEntry *referencingCacheEntry = GetCitusTableCacheEntry(tableId);
|
||||
partitionMethod = referencingCacheEntry->partitionMethod;
|
||||
replicationModel = referencingCacheEntry->replicationModel;
|
||||
}
|
||||
else
|
||||
{
|
||||
regularTable = true;
|
||||
}
|
||||
|
||||
if (regularTable)
|
||||
if (!IsCitusTable(tableId))
|
||||
{
|
||||
return "regular table";
|
||||
}
|
||||
else if (partitionMethod == 'h')
|
||||
|
||||
CitusTableCacheEntry *tableCacheEntry = GetCitusTableCacheEntry(tableId);
|
||||
if (IsCitusTableTypeCacheEntry(tableCacheEntry, HASH_DISTRIBUTED))
|
||||
{
|
||||
return "distributed table";
|
||||
}
|
||||
else if (partitionMethod == 'n' && replicationModel == 't')
|
||||
else if (IsCitusTableTypeCacheEntry(tableCacheEntry, REFERENCE_TABLE))
|
||||
{
|
||||
return "reference table";
|
||||
}
|
||||
else if (partitionMethod == 'n' && replicationModel != 't')
|
||||
else if (IsCitusTableTypeCacheEntry(tableCacheEntry, CITUS_LOCAL_TABLE))
|
||||
{
|
||||
return "citus local table";
|
||||
}
|
||||
|
@ -577,6 +590,18 @@ IsCitusTable(Oid relationId)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsCitusTableRangeVar returns whether the table named in the given
|
||||
* rangeVar is a Citus table.
|
||||
*/
|
||||
bool
|
||||
IsCitusTableRangeVar(RangeVar *rangeVar, LOCKMODE lockMode, bool missingOK)
|
||||
{
|
||||
Oid relationId = RangeVarGetRelid(rangeVar, lockMode, missingOK);
|
||||
return IsCitusTable(relationId);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsCitusTableViaCatalog returns whether the given relation is a
|
||||
* distributed table or not.
|
||||
|
@ -765,14 +790,28 @@ PgDistPartitionTupleViaCatalog(Oid relationId)
|
|||
|
||||
|
||||
/*
|
||||
* IsCitusLocalTableByDistParams returns true if given partitionMethod and
|
||||
* replicationModel would identify a citus local table.
|
||||
* IsReferenceTableByDistParams returns true if given partitionMethod and
|
||||
* replicationModel would identify a reference table.
|
||||
*/
|
||||
bool
|
||||
IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel)
|
||||
IsReferenceTableByDistParams(char partitionMethod, char replicationModel)
|
||||
{
|
||||
return partitionMethod == DISTRIBUTE_BY_NONE &&
|
||||
replicationModel != REPLICATION_MODEL_2PC;
|
||||
replicationModel == REPLICATION_MODEL_2PC;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsCitusLocalTableByDistParams returns true if given partitionMethod,
|
||||
* replicationModel and colocationId would identify a citus local table.
|
||||
*/
|
||||
bool
|
||||
IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel,
|
||||
uint32 colocationId)
|
||||
{
|
||||
return partitionMethod == DISTRIBUTE_BY_NONE &&
|
||||
replicationModel != REPLICATION_MODEL_2PC &&
|
||||
colocationId == INVALID_COLOCATION_ID;
|
||||
}
|
||||
|
||||
|
||||
|
@ -4837,11 +4876,14 @@ CitusTableTypeIdList(CitusTableType citusTableType)
|
|||
|
||||
Datum partMethodDatum = datumArray[Anum_pg_dist_partition_partmethod - 1];
|
||||
Datum replicationModelDatum = datumArray[Anum_pg_dist_partition_repmodel - 1];
|
||||
Datum colocationIdDatum = datumArray[Anum_pg_dist_partition_colocationid - 1];
|
||||
|
||||
Oid partitionMethod = DatumGetChar(partMethodDatum);
|
||||
Oid replicationModel = DatumGetChar(replicationModelDatum);
|
||||
uint32 colocationId = DatumGetUInt32(colocationIdDatum);
|
||||
|
||||
if (IsCitusTableTypeInternal(partitionMethod, replicationModel, citusTableType))
|
||||
if (IsCitusTableTypeInternal(partitionMethod, replicationModel, colocationId,
|
||||
citusTableType))
|
||||
{
|
||||
Datum relationIdDatum = datumArray[Anum_pg_dist_partition_logicalrelid - 1];
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -985,7 +985,7 @@ AppendShardSizeQuery(StringInfo selectQuery, ShardInterval *shardInterval)
|
|||
|
||||
appendStringInfo(selectQuery, "SELECT " UINT64_FORMAT " AS shard_id, ", shardId);
|
||||
appendStringInfo(selectQuery, "%s AS shard_name, ", quotedShardName);
|
||||
appendStringInfo(selectQuery, PG_RELATION_SIZE_FUNCTION, quotedShardName);
|
||||
appendStringInfo(selectQuery, PG_TOTAL_RELATION_SIZE_FUNCTION, quotedShardName);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1670,6 +1670,48 @@ TupleToGroupShardPlacement(TupleDesc tupleDescriptor, HeapTuple heapTuple)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* LookupTaskPlacementHostAndPort sets the nodename and nodeport for the given task placement
|
||||
* with a lookup.
|
||||
*/
|
||||
void
|
||||
LookupTaskPlacementHostAndPort(ShardPlacement *taskPlacement, char **nodeName,
|
||||
int *nodePort)
|
||||
{
|
||||
if (IsDummyPlacement(taskPlacement))
|
||||
{
|
||||
/*
|
||||
* If we create a dummy placement for the local node, it is possible
|
||||
* that the entry doesn't exist in pg_dist_node, hence a lookup will fail.
|
||||
* In that case we want to use the dummy placements values.
|
||||
*/
|
||||
*nodeName = taskPlacement->nodeName;
|
||||
*nodePort = taskPlacement->nodePort;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* We want to lookup the node information again since it is possible that
|
||||
* there were changes in pg_dist_node and we will get those invalidations
|
||||
* in LookupNodeForGroup.
|
||||
*/
|
||||
WorkerNode *workerNode = LookupNodeForGroup(taskPlacement->groupId);
|
||||
*nodeName = workerNode->workerName;
|
||||
*nodePort = workerNode->workerPort;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsDummyPlacement returns true if the given placement is a dummy placement.
|
||||
*/
|
||||
bool
|
||||
IsDummyPlacement(ShardPlacement *taskPlacement)
|
||||
{
|
||||
return taskPlacement->nodeId == LOCAL_NODE_ID;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* InsertShardRow opens the shard system catalog, and inserts a new row with the
|
||||
* given values into that system catalog. Note that we allow the user to pass in
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -425,6 +425,7 @@ ErrorIfCurrentUserCanNotDistributeObject(char *textType, ObjectType type,
|
|||
case OBJECT_COLLATION:
|
||||
case OBJECT_VIEW:
|
||||
case OBJECT_ROLE:
|
||||
case OBJECT_PUBLICATION:
|
||||
{
|
||||
check_object_ownership(userId, type, *addr, node, *relation);
|
||||
break;
|
||||
|
|
|
@ -215,6 +215,7 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool
|
|||
{
|
||||
bool colocatedShard = true;
|
||||
List *insertedShardPlacements = NIL;
|
||||
List *insertedShardIds = NIL;
|
||||
|
||||
/* make sure that tables are hash partitioned */
|
||||
CheckHashPartitionedTable(targetRelationId);
|
||||
|
@ -254,7 +255,9 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool
|
|||
foreach_ptr(sourceShardInterval, sourceShardIntervalList)
|
||||
{
|
||||
uint64 sourceShardId = sourceShardInterval->shardId;
|
||||
uint64 newShardId = GetNextShardId();
|
||||
uint64 *newShardIdPtr = (uint64 *) palloc0(sizeof(uint64));
|
||||
*newShardIdPtr = GetNextShardId();
|
||||
insertedShardIds = lappend(insertedShardIds, newShardIdPtr);
|
||||
|
||||
int32 shardMinValue = DatumGetInt32(sourceShardInterval->minValue);
|
||||
int32 shardMaxValue = DatumGetInt32(sourceShardInterval->maxValue);
|
||||
|
@ -263,7 +266,7 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool
|
|||
List *sourceShardPlacementList = ShardPlacementListSortedByWorker(
|
||||
sourceShardId);
|
||||
|
||||
InsertShardRow(targetRelationId, newShardId, targetShardStorageType,
|
||||
InsertShardRow(targetRelationId, *newShardIdPtr, targetShardStorageType,
|
||||
shardMinValueText, shardMaxValueText);
|
||||
|
||||
ShardPlacement *sourcePlacement = NULL;
|
||||
|
@ -272,21 +275,26 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool
|
|||
int32 groupId = sourcePlacement->groupId;
|
||||
const uint64 shardSize = 0;
|
||||
|
||||
/*
|
||||
* Optimistically add shard placement row the pg_dist_shard_placement, in case
|
||||
* of any error it will be roll-backed.
|
||||
*/
|
||||
uint64 shardPlacementId = InsertShardPlacementRow(newShardId,
|
||||
InsertShardPlacementRow(*newShardIdPtr,
|
||||
INVALID_PLACEMENT_ID,
|
||||
shardSize,
|
||||
groupId);
|
||||
|
||||
ShardPlacement *shardPlacement = LoadShardPlacement(newShardId,
|
||||
shardPlacementId);
|
||||
insertedShardPlacements = lappend(insertedShardPlacements, shardPlacement);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* load shard placements for the shard at once after all placement insertions
|
||||
* finished. That prevents MetadataCache from rebuilding unnecessarily after
|
||||
* each placement insertion.
|
||||
*/
|
||||
uint64 *shardIdPtr;
|
||||
foreach_ptr(shardIdPtr, insertedShardIds)
|
||||
{
|
||||
List *placementsForShard = ShardPlacementList(*shardIdPtr);
|
||||
insertedShardPlacements = list_concat(insertedShardPlacements,
|
||||
placementsForShard);
|
||||
}
|
||||
|
||||
CreateShardsOnWorkers(targetRelationId, insertedShardPlacements,
|
||||
useExclusiveConnections, colocatedShard);
|
||||
}
|
||||
|
|
|
@ -461,10 +461,7 @@ ResolveRelationId(text *relationName, bool missingOk)
|
|||
* definition, optional column storage and statistics definitions, and index
|
||||
* constraint and trigger definitions.
|
||||
* When IncludeIdentities is NO_IDENTITY, the function does not include identity column
|
||||
* specifications. When it's INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS, the function
|
||||
* uses sequences and set them as default values for identity columns by using exactly
|
||||
* the same approach with worker_nextval('sequence') & nextval('sequence') logic
|
||||
* desribed above. When it's INCLUDE_IDENTITY it creates GENERATED .. AS IDENTIY clauses.
|
||||
* specifications. When it's INCLUDE_IDENTITY it creates GENERATED .. AS IDENTIY clauses.
|
||||
*/
|
||||
List *
|
||||
GetFullTableCreationCommands(Oid relationId,
|
||||
|
@ -500,6 +497,15 @@ GetFullTableCreationCommands(Oid relationId,
|
|||
tableDDLEventList = lappend(tableDDLEventList,
|
||||
truncateTriggerCommand);
|
||||
}
|
||||
|
||||
/*
|
||||
* For identity column sequences, we only need to modify
|
||||
* their min/max values to produce unique values on the worker nodes.
|
||||
*/
|
||||
List *identitySequenceDependencyCommandList =
|
||||
IdentitySequenceDependencyCommandList(relationId);
|
||||
tableDDLEventList = list_concat(tableDDLEventList,
|
||||
identitySequenceDependencyCommandList);
|
||||
}
|
||||
|
||||
tableDDLEventList = list_concat(tableDDLEventList, postLoadCreationCommandList);
|
||||
|
|
|
@ -190,6 +190,19 @@ typedef struct WorkerShardStatistics
|
|||
HTAB *statistics;
|
||||
} WorkerShardStatistics;
|
||||
|
||||
/* ShardMoveDependencyHashEntry contains the taskId which any new shard move task within the corresponding colocation group must take a dependency on */
|
||||
typedef struct ShardMoveDependencyInfo
|
||||
{
|
||||
int64 key;
|
||||
int64 taskId;
|
||||
} ShardMoveDependencyInfo;
|
||||
|
||||
typedef struct ShardMoveDependencies
|
||||
{
|
||||
HTAB *colocationDependencies;
|
||||
HTAB *nodeDependencies;
|
||||
} ShardMoveDependencies;
|
||||
|
||||
char *VariablesToBePassedToNewConnections = NULL;
|
||||
|
||||
/* static declarations for main logic */
|
||||
|
@ -475,6 +488,7 @@ GetRebalanceSteps(RebalanceOptions *options)
|
|||
/* sort the lists to make the function more deterministic */
|
||||
List *activeWorkerList = SortedActiveWorkers();
|
||||
List *activeShardPlacementListList = NIL;
|
||||
List *unbalancedShards = NIL;
|
||||
|
||||
Oid relationId = InvalidOid;
|
||||
foreach_oid(relationId, options->relationIdList)
|
||||
|
@ -490,8 +504,29 @@ GetRebalanceSteps(RebalanceOptions *options)
|
|||
shardPlacementList, options->workerNode);
|
||||
}
|
||||
|
||||
activeShardPlacementListList =
|
||||
lappend(activeShardPlacementListList, activeShardPlacementListForRelation);
|
||||
if (list_length(activeShardPlacementListForRelation) >= list_length(
|
||||
activeWorkerList))
|
||||
{
|
||||
activeShardPlacementListList = lappend(activeShardPlacementListList,
|
||||
activeShardPlacementListForRelation);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* If the number of shard groups are less than the number of worker nodes,
|
||||
* at least one of the worker nodes will remain empty. For such cases,
|
||||
* we consider those shard groups as a colocation group and try to
|
||||
* distribute them across the cluster.
|
||||
*/
|
||||
unbalancedShards = list_concat(unbalancedShards,
|
||||
activeShardPlacementListForRelation);
|
||||
}
|
||||
}
|
||||
|
||||
if (list_length(unbalancedShards) > 0)
|
||||
{
|
||||
activeShardPlacementListList = lappend(activeShardPlacementListList,
|
||||
unbalancedShards);
|
||||
}
|
||||
|
||||
if (options->threshold < options->rebalanceStrategy->minimumThreshold)
|
||||
|
@ -1796,10 +1831,10 @@ static void
|
|||
RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid)
|
||||
{
|
||||
char transferMode = LookupShardTransferMode(shardReplicationModeOid);
|
||||
EnsureReferenceTablesExistOnAllNodesExtended(transferMode);
|
||||
|
||||
if (list_length(options->relationIdList) == 0)
|
||||
{
|
||||
EnsureReferenceTablesExistOnAllNodesExtended(transferMode);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1814,6 +1849,25 @@ RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid)
|
|||
|
||||
List *placementUpdateList = GetRebalanceSteps(options);
|
||||
|
||||
if (transferMode == TRANSFER_MODE_AUTOMATIC)
|
||||
{
|
||||
/*
|
||||
* If the shard transfer mode is set to auto, we should check beforehand
|
||||
* if we are able to use logical replication to transfer shards or not.
|
||||
* We throw an error if any of the tables do not have a replica identity, which
|
||||
* is required for logical replication to replicate UPDATE and DELETE commands.
|
||||
*/
|
||||
PlacementUpdateEvent *placementUpdate = NULL;
|
||||
foreach_ptr(placementUpdate, placementUpdateList)
|
||||
{
|
||||
Oid relationId = RelationIdForShard(placementUpdate->shardId);
|
||||
List *colocatedTableList = ColocatedTableList(relationId);
|
||||
VerifyTablesHaveReplicaIdentity(colocatedTableList);
|
||||
}
|
||||
}
|
||||
|
||||
EnsureReferenceTablesExistOnAllNodesExtended(transferMode);
|
||||
|
||||
if (list_length(placementUpdateList) == 0)
|
||||
{
|
||||
return;
|
||||
|
@ -1857,6 +1911,137 @@ ErrorOnConcurrentRebalance(RebalanceOptions *options)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetColocationId function returns the colocationId of the shard in a PlacementUpdateEvent.
|
||||
*/
|
||||
static int64
|
||||
GetColocationId(PlacementUpdateEvent *move)
|
||||
{
|
||||
ShardInterval *shardInterval = LoadShardInterval(move->shardId);
|
||||
|
||||
CitusTableCacheEntry *citusTableCacheEntry = GetCitusTableCacheEntry(
|
||||
shardInterval->relationId);
|
||||
|
||||
return citusTableCacheEntry->colocationId;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* InitializeShardMoveDependencies function creates the hash maps that we use to track
|
||||
* the latest moves so that subsequent moves with the same properties must take a dependency
|
||||
* on them. There are two hash maps. One is for tracking the latest move scheduled in a
|
||||
* given colocation group and the other one is for tracking the latest move which involves
|
||||
* a given node either as its source node or its target node.
|
||||
*/
|
||||
static ShardMoveDependencies
|
||||
InitializeShardMoveDependencies()
|
||||
{
|
||||
ShardMoveDependencies shardMoveDependencies;
|
||||
shardMoveDependencies.colocationDependencies = CreateSimpleHashWithNameAndSize(int64,
|
||||
ShardMoveDependencyInfo,
|
||||
"colocationDependencyHashMap",
|
||||
6);
|
||||
shardMoveDependencies.nodeDependencies = CreateSimpleHashWithNameAndSize(int64,
|
||||
ShardMoveDependencyInfo,
|
||||
"nodeDependencyHashMap",
|
||||
6);
|
||||
|
||||
return shardMoveDependencies;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GenerateTaskMoveDependencyList creates and returns a List of taskIds that
|
||||
* the move must take a dependency on.
|
||||
*/
|
||||
static int64 *
|
||||
GenerateTaskMoveDependencyList(PlacementUpdateEvent *move, int64 colocationId,
|
||||
ShardMoveDependencies shardMoveDependencies, int *nDepends)
|
||||
{
|
||||
HTAB *dependsList = CreateSimpleHashSetWithNameAndSize(int64,
|
||||
"shardMoveDependencyList", 0);
|
||||
|
||||
bool found;
|
||||
|
||||
/* Check if there exists a move in the same colocation group scheduled earlier. */
|
||||
ShardMoveDependencyInfo *shardMoveDependencyInfo = hash_search(
|
||||
shardMoveDependencies.colocationDependencies, &colocationId, HASH_ENTER, &found);
|
||||
|
||||
if (found)
|
||||
{
|
||||
hash_search(dependsList, &shardMoveDependencyInfo->taskId, HASH_ENTER, NULL);
|
||||
}
|
||||
|
||||
/* Check if there exists a move scheduled earlier whose source or target node
|
||||
* overlaps with the current move's source node. */
|
||||
shardMoveDependencyInfo = hash_search(
|
||||
shardMoveDependencies.nodeDependencies, &move->sourceNode->nodeId, HASH_ENTER,
|
||||
&found);
|
||||
|
||||
if (found)
|
||||
{
|
||||
hash_search(dependsList, &shardMoveDependencyInfo->taskId, HASH_ENTER, NULL);
|
||||
}
|
||||
|
||||
/* Check if there exists a move scheduled earlier whose source or target node
|
||||
* overlaps with the current move's target node. */
|
||||
shardMoveDependencyInfo = hash_search(
|
||||
shardMoveDependencies.nodeDependencies, &move->targetNode->nodeId, HASH_ENTER,
|
||||
&found);
|
||||
|
||||
|
||||
if (found)
|
||||
{
|
||||
hash_search(dependsList, &shardMoveDependencyInfo->taskId, HASH_ENTER, NULL);
|
||||
}
|
||||
|
||||
*nDepends = hash_get_num_entries(dependsList);
|
||||
|
||||
int64 *dependsArray = NULL;
|
||||
|
||||
if (*nDepends > 0)
|
||||
{
|
||||
HASH_SEQ_STATUS seq;
|
||||
|
||||
dependsArray = palloc((*nDepends) * sizeof(int64));
|
||||
|
||||
hash_seq_init(&seq, dependsList);
|
||||
int i = 0;
|
||||
int64 *dependsTaskId;
|
||||
|
||||
while ((dependsTaskId = (int64 *) hash_seq_search(&seq)) != NULL)
|
||||
{
|
||||
dependsArray[i++] = *dependsTaskId;
|
||||
}
|
||||
}
|
||||
|
||||
return dependsArray;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* UpdateShardMoveDependencies function updates the dependency maps with the latest move's taskId.
|
||||
*/
|
||||
static void
|
||||
UpdateShardMoveDependencies(PlacementUpdateEvent *move, uint64 colocationId, int64 taskId,
|
||||
ShardMoveDependencies shardMoveDependencies)
|
||||
{
|
||||
ShardMoveDependencyInfo *shardMoveDependencyInfo = hash_search(
|
||||
shardMoveDependencies.colocationDependencies, &colocationId, HASH_ENTER, NULL);
|
||||
shardMoveDependencyInfo->taskId = taskId;
|
||||
|
||||
shardMoveDependencyInfo = hash_search(shardMoveDependencies.nodeDependencies,
|
||||
&move->sourceNode->nodeId, HASH_ENTER, NULL);
|
||||
|
||||
shardMoveDependencyInfo->taskId = taskId;
|
||||
|
||||
shardMoveDependencyInfo = hash_search(shardMoveDependencies.nodeDependencies,
|
||||
&move->targetNode->nodeId, HASH_ENTER, NULL);
|
||||
|
||||
shardMoveDependencyInfo->taskId = taskId;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RebalanceTableShardsBackground rebalances the shards for the relations
|
||||
* inside the relationIdList across the different workers. It does so using our
|
||||
|
@ -1894,12 +2079,6 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
|
|||
EnsureTableOwner(colocatedTableId);
|
||||
}
|
||||
|
||||
if (shardTransferMode == TRANSFER_MODE_AUTOMATIC)
|
||||
{
|
||||
/* make sure that all tables included in the rebalance have a replica identity*/
|
||||
VerifyTablesHaveReplicaIdentity(colocatedTableList);
|
||||
}
|
||||
|
||||
List *placementUpdateList = GetRebalanceSteps(options);
|
||||
|
||||
if (list_length(placementUpdateList) == 0)
|
||||
|
@ -1908,6 +2087,23 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (shardTransferMode == TRANSFER_MODE_AUTOMATIC)
|
||||
{
|
||||
/*
|
||||
* If the shard transfer mode is set to auto, we should check beforehand
|
||||
* if we are able to use logical replication to transfer shards or not.
|
||||
* We throw an error if any of the tables do not have a replica identity, which
|
||||
* is required for logical replication to replicate UPDATE and DELETE commands.
|
||||
*/
|
||||
PlacementUpdateEvent *placementUpdate = NULL;
|
||||
foreach_ptr(placementUpdate, placementUpdateList)
|
||||
{
|
||||
relationId = RelationIdForShard(placementUpdate->shardId);
|
||||
List *colocatedTables = ColocatedTableList(relationId);
|
||||
VerifyTablesHaveReplicaIdentity(colocatedTables);
|
||||
}
|
||||
}
|
||||
|
||||
DropOrphanedResourcesInSeparateTransaction();
|
||||
|
||||
/* find the name of the shard transfer mode to interpolate in the scheduled command */
|
||||
|
@ -1922,18 +2118,8 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
|
|||
StringInfoData buf = { 0 };
|
||||
initStringInfo(&buf);
|
||||
|
||||
/*
|
||||
* Currently we only have two tasks that any move can depend on:
|
||||
* - replicating reference tables
|
||||
* - the previous move
|
||||
*
|
||||
* prevJobIdx tells what slot to write the id of the task into. We only use both slots
|
||||
* if we are actually replicating reference tables.
|
||||
*/
|
||||
int64 prevJobId[2] = { 0 };
|
||||
int prevJobIdx = 0;
|
||||
|
||||
List *referenceTableIdList = NIL;
|
||||
int64 replicateRefTablesTaskId = 0;
|
||||
|
||||
if (HasNodesWithMissingReferenceTables(&referenceTableIdList))
|
||||
{
|
||||
|
@ -1949,15 +2135,15 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
|
|||
appendStringInfo(&buf,
|
||||
"SELECT pg_catalog.replicate_reference_tables(%s)",
|
||||
quote_literal_cstr(shardTranferModeLabel));
|
||||
BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data,
|
||||
prevJobIdx, prevJobId);
|
||||
prevJobId[prevJobIdx] = task->taskid;
|
||||
prevJobIdx++;
|
||||
BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data, 0,
|
||||
NULL);
|
||||
replicateRefTablesTaskId = task->taskid;
|
||||
}
|
||||
|
||||
PlacementUpdateEvent *move = NULL;
|
||||
bool first = true;
|
||||
int prevMoveIndex = prevJobIdx;
|
||||
|
||||
ShardMoveDependencies shardMoveDependencies = InitializeShardMoveDependencies();
|
||||
|
||||
foreach_ptr(move, placementUpdateList)
|
||||
{
|
||||
resetStringInfo(&buf);
|
||||
|
@ -1969,14 +2155,27 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
|
|||
move->targetNode->nodeId,
|
||||
quote_literal_cstr(shardTranferModeLabel));
|
||||
|
||||
BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data,
|
||||
prevJobIdx, prevJobId);
|
||||
prevJobId[prevMoveIndex] = task->taskid;
|
||||
if (first)
|
||||
int64 colocationId = GetColocationId(move);
|
||||
|
||||
int nDepends = 0;
|
||||
|
||||
int64 *dependsArray = GenerateTaskMoveDependencyList(move, colocationId,
|
||||
shardMoveDependencies,
|
||||
&nDepends);
|
||||
|
||||
if (nDepends == 0 && replicateRefTablesTaskId > 0)
|
||||
{
|
||||
first = false;
|
||||
prevJobIdx++;
|
||||
nDepends = 1;
|
||||
dependsArray = palloc(nDepends * sizeof(int64));
|
||||
dependsArray[0] = replicateRefTablesTaskId;
|
||||
}
|
||||
|
||||
BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data,
|
||||
nDepends,
|
||||
dependsArray);
|
||||
|
||||
UpdateShardMoveDependencies(move, colocationId, task->taskid,
|
||||
shardMoveDependencies);
|
||||
}
|
||||
|
||||
ereport(NOTICE,
|
||||
|
|
|
@ -70,22 +70,43 @@ typedef struct ShardCommandList
|
|||
List *ddlCommandList;
|
||||
} ShardCommandList;
|
||||
|
||||
static const char *ShardTransferTypeNames[] = {
|
||||
[SHARD_TRANSFER_INVALID_FIRST] = "unknown",
|
||||
[SHARD_TRANSFER_MOVE] = "move",
|
||||
[SHARD_TRANSFER_COPY] = "copy",
|
||||
};
|
||||
|
||||
static const char *ShardTransferTypeNamesCapitalized[] = {
|
||||
[SHARD_TRANSFER_INVALID_FIRST] = "unknown",
|
||||
[SHARD_TRANSFER_MOVE] = "Move",
|
||||
[SHARD_TRANSFER_COPY] = "Copy",
|
||||
};
|
||||
|
||||
static const char *ShardTransferTypeNamesContinuous[] = {
|
||||
[SHARD_TRANSFER_INVALID_FIRST] = "unknown",
|
||||
[SHARD_TRANSFER_MOVE] = "Moving",
|
||||
[SHARD_TRANSFER_COPY] = "Copying",
|
||||
};
|
||||
|
||||
static const char *ShardTransferTypeFunctionNames[] = {
|
||||
[SHARD_TRANSFER_INVALID_FIRST] = "unknown",
|
||||
[SHARD_TRANSFER_MOVE] = "citus_move_shard_placement",
|
||||
[SHARD_TRANSFER_COPY] = "citus_copy_shard_placement",
|
||||
};
|
||||
|
||||
/* local function forward declarations */
|
||||
static bool CanUseLogicalReplication(Oid relationId, char shardReplicationMode);
|
||||
static void ErrorIfTableCannotBeReplicated(Oid relationId);
|
||||
static void ErrorIfTargetNodeIsNotSafeToCopyTo(const char *targetNodeName,
|
||||
int targetNodePort);
|
||||
static void ErrorIfTargetNodeIsNotSafeForTransfer(const char *targetNodeName,
|
||||
int targetNodePort,
|
||||
ShardTransferType transferType);
|
||||
static void ErrorIfSameNode(char *sourceNodeName, int sourceNodePort,
|
||||
char *targetNodeName, int targetNodePort,
|
||||
const char *operationName);
|
||||
static void ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName,
|
||||
int32 sourceNodePort, char *targetNodeName,
|
||||
int32 targetNodePort,
|
||||
char shardReplicationMode);
|
||||
static void CopyShardTables(List *shardIntervalList, char *sourceNodeName,
|
||||
int32 sourceNodePort, char *targetNodeName,
|
||||
int32 targetNodePort, bool useLogicalReplication,
|
||||
char *operationName);
|
||||
const char *operationName);
|
||||
static void CopyShardTablesViaLogicalReplication(List *shardIntervalList,
|
||||
char *sourceNodeName,
|
||||
int32 sourceNodePort,
|
||||
|
@ -100,7 +121,7 @@ static void EnsureShardCanBeCopied(int64 shardId, const char *sourceNodeName,
|
|||
int32 targetNodePort);
|
||||
static List * RecreateTableDDLCommandList(Oid relationId);
|
||||
static void EnsureTableListOwner(List *tableIdList);
|
||||
static void EnsureTableListSuitableForReplication(List *tableIdList);
|
||||
static void ErrorIfReplicatingDistributedTableWithFKeys(List *tableIdList);
|
||||
|
||||
static void DropShardPlacementsFromMetadata(List *shardList,
|
||||
char *nodeName,
|
||||
|
@ -112,12 +133,28 @@ static void UpdateColocatedShardPlacementMetadataOnWorkers(int64 shardId,
|
|||
int32 targetNodePort);
|
||||
static bool IsShardListOnNode(List *colocatedShardList, char *targetNodeName,
|
||||
uint32 targetPort);
|
||||
static void SetupRebalanceMonitorForShardTransfer(uint64 shardId, Oid distributedTableId,
|
||||
char *sourceNodeName,
|
||||
uint32 sourceNodePort,
|
||||
char *targetNodeName,
|
||||
uint32 targetNodePort,
|
||||
ShardTransferType transferType);
|
||||
static void CheckSpaceConstraints(MultiConnection *connection,
|
||||
uint64 colocationSizeInBytes);
|
||||
static void EnsureAllShardsCanBeCopied(List *colocatedShardList,
|
||||
char *sourceNodeName, uint32 sourceNodePort,
|
||||
char *targetNodeName, uint32 targetNodePort);
|
||||
static void EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList,
|
||||
char *sourceNodeName, uint32 sourceNodePort,
|
||||
char *targetNodeName, uint32
|
||||
targetNodePort);
|
||||
char *targetNodeName, uint32 targetNodePort,
|
||||
ShardTransferType transferType);
|
||||
static bool TransferAlreadyCompleted(List *colocatedShardList,
|
||||
char *sourceNodeName, uint32 sourceNodePort,
|
||||
char *targetNodeName, uint32 targetNodePort,
|
||||
ShardTransferType transferType);
|
||||
static void LockColocatedRelationsForMove(List *colocatedTableList);
|
||||
static void ErrorIfForeignTableForShardTransfer(List *colocatedTableList,
|
||||
ShardTransferType transferType);
|
||||
static List * RecreateShardDDLCommandList(ShardInterval *shardInterval,
|
||||
const char *sourceNodeName,
|
||||
int32 sourceNodePort);
|
||||
|
@ -163,9 +200,9 @@ citus_copy_shard_placement(PG_FUNCTION_ARGS)
|
|||
|
||||
char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
|
||||
|
||||
ReplicateColocatedShardPlacement(shardId, sourceNodeName, sourceNodePort,
|
||||
TransferShards(shardId, sourceNodeName, sourceNodePort,
|
||||
targetNodeName, targetNodePort,
|
||||
shardReplicationMode);
|
||||
shardReplicationMode, SHARD_TRANSFER_COPY);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
@ -192,10 +229,9 @@ citus_copy_shard_placement_with_nodeid(PG_FUNCTION_ARGS)
|
|||
|
||||
char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
|
||||
|
||||
ReplicateColocatedShardPlacement(shardId,
|
||||
sourceNode->workerName, sourceNode->workerPort,
|
||||
TransferShards(shardId, sourceNode->workerName, sourceNode->workerPort,
|
||||
targetNode->workerName, targetNode->workerPort,
|
||||
shardReplicationMode);
|
||||
shardReplicationMode, SHARD_TRANSFER_COPY);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
@ -228,9 +264,9 @@ master_copy_shard_placement(PG_FUNCTION_ARGS)
|
|||
ereport(WARNING, (errmsg("do_repair argument is deprecated")));
|
||||
}
|
||||
|
||||
ReplicateColocatedShardPlacement(shardId, sourceNodeName, sourceNodePort,
|
||||
TransferShards(shardId, sourceNodeName, sourceNodePort,
|
||||
targetNodeName, targetNodePort,
|
||||
shardReplicationMode);
|
||||
shardReplicationMode, SHARD_TRANSFER_COPY);
|
||||
|
||||
|
||||
PG_RETURN_VOID();
|
||||
|
@ -264,9 +300,10 @@ citus_move_shard_placement(PG_FUNCTION_ARGS)
|
|||
int32 targetNodePort = PG_GETARG_INT32(4);
|
||||
Oid shardReplicationModeOid = PG_GETARG_OID(5);
|
||||
|
||||
citus_move_shard_placement_internal(shardId, sourceNodeName, sourceNodePort,
|
||||
char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
|
||||
TransferShards(shardId, sourceNodeName, sourceNodePort,
|
||||
targetNodeName, targetNodePort,
|
||||
shardReplicationModeOid);
|
||||
shardReplicationMode, SHARD_TRANSFER_MOVE);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
@ -291,126 +328,111 @@ citus_move_shard_placement_with_nodeid(PG_FUNCTION_ARGS)
|
|||
WorkerNode *sourceNode = FindNodeWithNodeId(sourceNodeId, missingOk);
|
||||
WorkerNode *targetNode = FindNodeWithNodeId(targetNodeId, missingOk);
|
||||
|
||||
citus_move_shard_placement_internal(shardId, sourceNode->workerName,
|
||||
char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
|
||||
TransferShards(shardId, sourceNode->workerName,
|
||||
sourceNode->workerPort, targetNode->workerName,
|
||||
targetNode->workerPort,
|
||||
shardReplicationModeOid);
|
||||
targetNode->workerPort, shardReplicationMode, SHARD_TRANSFER_MOVE);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* citus_move_shard_placement_internal is the internal function for shard moves.
|
||||
* TransferShards is the function for shard transfers.
|
||||
*/
|
||||
void
|
||||
citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName,
|
||||
TransferShards(int64 shardId, char *sourceNodeName,
|
||||
int32 sourceNodePort, char *targetNodeName,
|
||||
int32 targetNodePort, Oid shardReplicationModeOid)
|
||||
int32 targetNodePort, char shardReplicationMode,
|
||||
ShardTransferType transferType)
|
||||
{
|
||||
ListCell *colocatedTableCell = NULL;
|
||||
ListCell *colocatedShardCell = NULL;
|
||||
/* strings to be used in log messages */
|
||||
const char *operationName = ShardTransferTypeNames[transferType];
|
||||
const char *operationNameCapitalized =
|
||||
ShardTransferTypeNamesCapitalized[transferType];
|
||||
const char *operationFunctionName = ShardTransferTypeFunctionNames[transferType];
|
||||
|
||||
/* cannot transfer shard to the same node */
|
||||
ErrorIfSameNode(sourceNodeName, sourceNodePort,
|
||||
targetNodeName, targetNodePort,
|
||||
"move");
|
||||
|
||||
Oid relationId = RelationIdForShard(shardId);
|
||||
ErrorIfMoveUnsupportedTableType(relationId);
|
||||
ErrorIfTargetNodeIsNotSafeToMove(targetNodeName, targetNodePort);
|
||||
|
||||
AcquirePlacementColocationLock(relationId, ExclusiveLock, "move");
|
||||
operationName);
|
||||
|
||||
ShardInterval *shardInterval = LoadShardInterval(shardId);
|
||||
Oid distributedTableId = shardInterval->relationId;
|
||||
|
||||
/* error if unsupported shard transfer */
|
||||
if (transferType == SHARD_TRANSFER_MOVE)
|
||||
{
|
||||
ErrorIfMoveUnsupportedTableType(distributedTableId);
|
||||
}
|
||||
else if (transferType == SHARD_TRANSFER_COPY)
|
||||
{
|
||||
ErrorIfTableCannotBeReplicated(distributedTableId);
|
||||
EnsureNoModificationsHaveBeenDone();
|
||||
}
|
||||
|
||||
ErrorIfTargetNodeIsNotSafeForTransfer(targetNodeName, targetNodePort, transferType);
|
||||
|
||||
AcquirePlacementColocationLock(distributedTableId, ExclusiveLock, operationName);
|
||||
|
||||
List *colocatedTableList = ColocatedTableList(distributedTableId);
|
||||
List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
|
||||
|
||||
foreach(colocatedTableCell, colocatedTableList)
|
||||
EnsureTableListOwner(colocatedTableList);
|
||||
|
||||
if (transferType == SHARD_TRANSFER_MOVE)
|
||||
{
|
||||
Oid colocatedTableId = lfirst_oid(colocatedTableCell);
|
||||
|
||||
/* check that user has owner rights in all co-located tables */
|
||||
EnsureTableOwner(colocatedTableId);
|
||||
|
||||
/*
|
||||
* Block concurrent DDL / TRUNCATE commands on the relation. Similarly,
|
||||
* block concurrent citus_move_shard_placement() on any shard of
|
||||
* the same relation. This is OK for now since we're executing shard
|
||||
* moves sequentially anyway.
|
||||
*/
|
||||
LockRelationOid(colocatedTableId, ShareUpdateExclusiveLock);
|
||||
LockColocatedRelationsForMove(colocatedTableList);
|
||||
}
|
||||
|
||||
if (IsForeignTable(relationId))
|
||||
ErrorIfForeignTableForShardTransfer(colocatedTableList, transferType);
|
||||
|
||||
if (transferType == SHARD_TRANSFER_COPY)
|
||||
{
|
||||
char *relationName = get_rel_name(colocatedTableId);
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("cannot move shard"),
|
||||
errdetail("Table %s is a foreign table. Moving "
|
||||
"shards backed by foreign tables is "
|
||||
"not supported.", relationName)));
|
||||
ErrorIfReplicatingDistributedTableWithFKeys(colocatedTableList);
|
||||
}
|
||||
}
|
||||
|
||||
/* we sort colocatedShardList so that lock operations will not cause any deadlocks */
|
||||
colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById);
|
||||
|
||||
/*
|
||||
* If there are no active placements on the source and only active placements on
|
||||
* the target node, we assume the copy to already be done.
|
||||
* We sort shardIntervalList so that lock operations will not cause any
|
||||
* deadlocks.
|
||||
*/
|
||||
if (IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) &&
|
||||
!IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort))
|
||||
colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById);
|
||||
|
||||
if (TransferAlreadyCompleted(colocatedShardList,
|
||||
sourceNodeName, sourceNodePort,
|
||||
targetNodeName, targetNodePort,
|
||||
transferType))
|
||||
{
|
||||
/* if the transfer is already completed, we can return right away */
|
||||
ereport(WARNING, (errmsg("shard is already present on node %s:%d",
|
||||
targetNodeName, targetNodePort),
|
||||
errdetail("Move may have already completed.")));
|
||||
errdetail("%s may have already completed.",
|
||||
operationNameCapitalized)));
|
||||
return;
|
||||
}
|
||||
|
||||
foreach(colocatedShardCell, colocatedShardList)
|
||||
{
|
||||
ShardInterval *colocatedShard = (ShardInterval *) lfirst(colocatedShardCell);
|
||||
uint64 colocatedShardId = colocatedShard->shardId;
|
||||
|
||||
EnsureShardCanBeCopied(colocatedShardId, sourceNodeName, sourceNodePort,
|
||||
EnsureAllShardsCanBeCopied(colocatedShardList, sourceNodeName, sourceNodePort,
|
||||
targetNodeName, targetNodePort);
|
||||
}
|
||||
|
||||
char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
|
||||
if (shardReplicationMode == TRANSFER_MODE_AUTOMATIC)
|
||||
{
|
||||
VerifyTablesHaveReplicaIdentity(colocatedTableList);
|
||||
}
|
||||
|
||||
EnsureEnoughDiskSpaceForShardMove(colocatedShardList, sourceNodeName, sourceNodePort,
|
||||
targetNodeName, targetNodePort);
|
||||
EnsureEnoughDiskSpaceForShardMove(colocatedShardList,
|
||||
sourceNodeName, sourceNodePort,
|
||||
targetNodeName, targetNodePort, transferType);
|
||||
|
||||
|
||||
/*
|
||||
* We want to be able to track progress of shard moves using
|
||||
* get_rebalancer_progress. If this move is initiated by the rebalancer,
|
||||
* then the rebalancer call has already set up the shared memory that is
|
||||
* used to do that. But if citus_move_shard_placement is called directly by
|
||||
* the user (or through any other mechanism), then the shared memory is not
|
||||
* set up yet. In that case we do it here.
|
||||
*/
|
||||
if (!IsRebalancerInternalBackend())
|
||||
{
|
||||
WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort);
|
||||
WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort);
|
||||
|
||||
PlacementUpdateEvent *placementUpdateEvent = palloc0(
|
||||
sizeof(PlacementUpdateEvent));
|
||||
placementUpdateEvent->updateType = PLACEMENT_UPDATE_MOVE;
|
||||
placementUpdateEvent->shardId = shardId;
|
||||
placementUpdateEvent->sourceNode = sourceNode;
|
||||
placementUpdateEvent->targetNode = targetNode;
|
||||
SetupRebalanceMonitor(list_make1(placementUpdateEvent), relationId,
|
||||
REBALANCE_PROGRESS_MOVING,
|
||||
PLACEMENT_UPDATE_STATUS_SETTING_UP);
|
||||
}
|
||||
SetupRebalanceMonitorForShardTransfer(shardId, distributedTableId,
|
||||
sourceNodeName, sourceNodePort,
|
||||
targetNodeName, targetNodePort,
|
||||
transferType);
|
||||
|
||||
UpdatePlacementUpdateStatusForShardIntervalList(
|
||||
colocatedShardList,
|
||||
|
@ -428,7 +450,7 @@ citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName,
|
|||
{
|
||||
BlockWritesToShardList(colocatedShardList);
|
||||
}
|
||||
else
|
||||
else if (transferType == SHARD_TRANSFER_MOVE)
|
||||
{
|
||||
/*
|
||||
* We prevent multiple shard moves in a transaction that use logical
|
||||
|
@ -452,6 +474,20 @@ citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName,
|
|||
PlacementMovedUsingLogicalReplicationInTX = true;
|
||||
}
|
||||
|
||||
if (transferType == SHARD_TRANSFER_COPY &&
|
||||
!IsCitusTableType(distributedTableId, REFERENCE_TABLE))
|
||||
{
|
||||
/*
|
||||
* When copying a shard to a new node, we should first ensure that reference
|
||||
* tables are present such that joins work immediately after copying the shard.
|
||||
* When copying a reference table, we are probably trying to achieve just that.
|
||||
*
|
||||
* Since this a long-running operation we do this after the error checks, but
|
||||
* before taking metadata locks.
|
||||
*/
|
||||
EnsureReferenceTablesExistOnAllNodesExtended(shardReplicationMode);
|
||||
}
|
||||
|
||||
DropOrphanedResourcesInSeparateTransaction();
|
||||
|
||||
ShardInterval *colocatedShard = NULL;
|
||||
|
@ -466,18 +502,21 @@ citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName,
|
|||
ErrorIfCleanupRecordForShardExists(qualifiedShardName);
|
||||
}
|
||||
|
||||
/*
|
||||
* CopyColocatedShardPlacement function copies given shard with its co-located
|
||||
* shards.
|
||||
*/
|
||||
CopyShardTables(colocatedShardList, sourceNodeName, sourceNodePort, targetNodeName,
|
||||
targetNodePort, useLogicalReplication, "citus_move_shard_placement");
|
||||
targetNodePort, useLogicalReplication, operationFunctionName);
|
||||
|
||||
if (transferType == SHARD_TRANSFER_MOVE)
|
||||
{
|
||||
/* delete old shards metadata and mark the shards as to be deferred drop */
|
||||
int32 sourceGroupId = GroupForNode(sourceNodeName, sourceNodePort);
|
||||
InsertCleanupRecordsForShardPlacementsOnNode(colocatedShardList,
|
||||
sourceGroupId);
|
||||
}
|
||||
|
||||
/*
|
||||
* Finally insert the placements to pg_dist_placement and sync it to the
|
||||
* metadata workers.
|
||||
*/
|
||||
colocatedShard = NULL;
|
||||
foreach_ptr(colocatedShard, colocatedShardList)
|
||||
{
|
||||
|
@ -488,17 +527,30 @@ citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName,
|
|||
InsertShardPlacementRow(colocatedShardId, placementId,
|
||||
ShardLength(colocatedShardId),
|
||||
groupId);
|
||||
|
||||
if (transferType == SHARD_TRANSFER_COPY &&
|
||||
ShouldSyncTableMetadata(colocatedShard->relationId))
|
||||
{
|
||||
char *placementCommand = PlacementUpsertCommand(colocatedShardId, placementId,
|
||||
0, groupId);
|
||||
|
||||
SendCommandToWorkersWithMetadata(placementCommand);
|
||||
}
|
||||
}
|
||||
|
||||
if (transferType == SHARD_TRANSFER_MOVE)
|
||||
{
|
||||
/*
|
||||
* Since this is move operation, we remove the placements from the metadata
|
||||
* for the source node after copy.
|
||||
*/
|
||||
DropShardPlacementsFromMetadata(colocatedShardList, sourceNodeName, sourceNodePort);
|
||||
DropShardPlacementsFromMetadata(colocatedShardList,
|
||||
sourceNodeName, sourceNodePort);
|
||||
|
||||
UpdateColocatedShardPlacementMetadataOnWorkers(shardId, sourceNodeName,
|
||||
sourceNodePort, targetNodeName,
|
||||
targetNodePort);
|
||||
}
|
||||
|
||||
UpdatePlacementUpdateStatusForShardIntervalList(
|
||||
colocatedShardList,
|
||||
|
@ -611,6 +663,70 @@ IsShardListOnNode(List *colocatedShardList, char *targetNodeName, uint32 targetN
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* LockColocatedRelationsForMove takes a list of relations, locks all of them
|
||||
* using ShareUpdateExclusiveLock
|
||||
*/
|
||||
static void
|
||||
LockColocatedRelationsForMove(List *colocatedTableList)
|
||||
{
|
||||
Oid colocatedTableId = InvalidOid;
|
||||
foreach_oid(colocatedTableId, colocatedTableList)
|
||||
{
|
||||
LockRelationOid(colocatedTableId, ShareUpdateExclusiveLock);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ErrorIfForeignTableForShardTransfer takes a list of relations, errors out if
|
||||
* there's a foreign table in the list.
|
||||
*/
|
||||
static void
|
||||
ErrorIfForeignTableForShardTransfer(List *colocatedTableList,
|
||||
ShardTransferType transferType)
|
||||
{
|
||||
Oid colocatedTableId = InvalidOid;
|
||||
foreach_oid(colocatedTableId, colocatedTableList)
|
||||
{
|
||||
if (IsForeignTable(colocatedTableId))
|
||||
{
|
||||
char *relationName = get_rel_name(colocatedTableId);
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("cannot %s shard",
|
||||
ShardTransferTypeNames[transferType]),
|
||||
errdetail("Table %s is a foreign table. "
|
||||
"%s shards backed by foreign tables is "
|
||||
"not supported.", relationName,
|
||||
ShardTransferTypeNamesContinuous[transferType])));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* EnsureAllShardsCanBeCopied is a wrapper around EnsureShardCanBeCopied.
|
||||
*/
|
||||
static void
|
||||
EnsureAllShardsCanBeCopied(List *colocatedShardList,
|
||||
char *sourceNodeName, uint32 sourceNodePort,
|
||||
char *targetNodeName, uint32 targetNodePort)
|
||||
{
|
||||
ShardInterval *colocatedShard = NULL;
|
||||
foreach_ptr(colocatedShard, colocatedShardList)
|
||||
{
|
||||
uint64 colocatedShardId = colocatedShard->shardId;
|
||||
|
||||
/*
|
||||
* To transfer shard, there should be healthy placement in source node and no
|
||||
* placement in the target node.
|
||||
*/
|
||||
EnsureShardCanBeCopied(colocatedShardId, sourceNodeName, sourceNodePort,
|
||||
targetNodeName, targetNodePort);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* EnsureEnoughDiskSpaceForShardMove checks that there is enough space for
|
||||
* shard moves of the given colocated shard list from source node to target node.
|
||||
|
@ -619,9 +735,10 @@ IsShardListOnNode(List *colocatedShardList, char *targetNodeName, uint32 targetN
|
|||
static void
|
||||
EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList,
|
||||
char *sourceNodeName, uint32 sourceNodePort,
|
||||
char *targetNodeName, uint32 targetNodePort)
|
||||
char *targetNodeName, uint32 targetNodePort,
|
||||
ShardTransferType transferType)
|
||||
{
|
||||
if (!CheckAvailableSpaceBeforeMove)
|
||||
if (!CheckAvailableSpaceBeforeMove || transferType != SHARD_TRANSFER_MOVE)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -636,6 +753,34 @@ EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* TransferAlreadyCompleted returns true if the given shard transfer is already done.
|
||||
* Returns false otherwise.
|
||||
*/
|
||||
static bool
|
||||
TransferAlreadyCompleted(List *colocatedShardList,
|
||||
char *sourceNodeName, uint32 sourceNodePort,
|
||||
char *targetNodeName, uint32 targetNodePort,
|
||||
ShardTransferType transferType)
|
||||
{
|
||||
if (transferType == SHARD_TRANSFER_MOVE &&
|
||||
IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) &&
|
||||
!IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (transferType == SHARD_TRANSFER_COPY &&
|
||||
IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) &&
|
||||
IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ShardListSizeInBytes returns the size in bytes of a set of shard tables.
|
||||
*/
|
||||
|
@ -682,6 +827,49 @@ ShardListSizeInBytes(List *shardList, char *workerNodeName, uint32
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* SetupRebalanceMonitorForShardTransfer prepares the parameters and
|
||||
* calls SetupRebalanceMonitor, unless the current transfer is a move
|
||||
* initiated by the rebalancer.
|
||||
* See comments on SetupRebalanceMonitor
|
||||
*/
|
||||
static void
|
||||
SetupRebalanceMonitorForShardTransfer(uint64 shardId, Oid distributedTableId,
|
||||
char *sourceNodeName, uint32 sourceNodePort,
|
||||
char *targetNodeName, uint32 targetNodePort,
|
||||
ShardTransferType transferType)
|
||||
{
|
||||
if (transferType == SHARD_TRANSFER_MOVE && IsRebalancerInternalBackend())
|
||||
{
|
||||
/*
|
||||
* We want to be able to track progress of shard moves using
|
||||
* get_rebalancer_progress. If this move is initiated by the rebalancer,
|
||||
* then the rebalancer call has already set up the shared memory that is
|
||||
* used to do that, so we should return here.
|
||||
* But if citus_move_shard_placement is called directly by the user
|
||||
* (or through any other mechanism), then the shared memory is not
|
||||
* set up yet. In that case we do it here.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort);
|
||||
WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort);
|
||||
|
||||
PlacementUpdateEvent *placementUpdateEvent = palloc0(
|
||||
sizeof(PlacementUpdateEvent));
|
||||
placementUpdateEvent->updateType =
|
||||
transferType == SHARD_TRANSFER_COPY ? PLACEMENT_UPDATE_COPY :
|
||||
PLACEMENT_UPDATE_MOVE;
|
||||
placementUpdateEvent->shardId = shardId;
|
||||
placementUpdateEvent->sourceNode = sourceNode;
|
||||
placementUpdateEvent->targetNode = targetNode;
|
||||
SetupRebalanceMonitor(list_make1(placementUpdateEvent), distributedTableId,
|
||||
REBALANCE_PROGRESS_MOVING,
|
||||
PLACEMENT_UPDATE_STATUS_SETTING_UP);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CheckSpaceConstraints checks there is enough space to place the colocation
|
||||
* on the node that the connection is connected to.
|
||||
|
@ -729,17 +917,19 @@ CheckSpaceConstraints(MultiConnection *connection, uint64 colocationSizeInBytes)
|
|||
|
||||
|
||||
/*
|
||||
* ErrorIfTargetNodeIsNotSafeToMove throws error if the target node is not
|
||||
* eligible for moving shards.
|
||||
* ErrorIfTargetNodeIsNotSafeForTransfer throws error if the target node is not
|
||||
* eligible for shard transfers.
|
||||
*/
|
||||
void
|
||||
ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int targetNodePort)
|
||||
static void
|
||||
ErrorIfTargetNodeIsNotSafeForTransfer(const char *targetNodeName, int targetNodePort,
|
||||
ShardTransferType transferType)
|
||||
{
|
||||
WorkerNode *workerNode = FindWorkerNode(targetNodeName, targetNodePort);
|
||||
if (workerNode == NULL)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("Moving shards to a non-existing node is not supported"),
|
||||
errmsg("%s shards to a non-existing node is not supported",
|
||||
ShardTransferTypeNamesContinuous[transferType]),
|
||||
errhint(
|
||||
"Add the target node via SELECT citus_add_node('%s', %d);",
|
||||
targetNodeName, targetNodePort)));
|
||||
|
@ -748,13 +938,14 @@ ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int targetNodePort)
|
|||
if (!workerNode->isActive)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("Moving shards to a non-active node is not supported"),
|
||||
errmsg("%s shards to a non-active node is not supported",
|
||||
ShardTransferTypeNamesContinuous[transferType]),
|
||||
errhint(
|
||||
"Activate the target node via SELECT citus_activate_node('%s', %d);",
|
||||
targetNodeName, targetNodePort)));
|
||||
}
|
||||
|
||||
if (!workerNode->shouldHaveShards)
|
||||
if (transferType == SHARD_TRANSFER_MOVE && !workerNode->shouldHaveShards)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("Moving shards to a node that shouldn't have a shard is "
|
||||
|
@ -767,8 +958,9 @@ ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int targetNodePort)
|
|||
if (!NodeIsPrimary(workerNode))
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("Moving shards to a secondary (e.g., replica) node is "
|
||||
"not supported")));
|
||||
errmsg("%s shards to a secondary (e.g., replica) node is "
|
||||
"not supported",
|
||||
ShardTransferTypeNamesContinuous[transferType])));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1046,41 +1238,6 @@ ErrorIfTableCannotBeReplicated(Oid relationId)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ErrorIfTargetNodeIsNotSafeToCopyTo throws an error if the target node is not
|
||||
* eligible for copying shards.
|
||||
*/
|
||||
static void
|
||||
ErrorIfTargetNodeIsNotSafeToCopyTo(const char *targetNodeName, int targetNodePort)
|
||||
{
|
||||
WorkerNode *workerNode = FindWorkerNode(targetNodeName, targetNodePort);
|
||||
if (workerNode == NULL)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("Copying shards to a non-existing node is not supported"),
|
||||
errhint(
|
||||
"Add the target node via SELECT citus_add_node('%s', %d);",
|
||||
targetNodeName, targetNodePort)));
|
||||
}
|
||||
|
||||
if (!workerNode->isActive)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("Copying shards to a non-active node is not supported"),
|
||||
errhint(
|
||||
"Activate the target node via SELECT citus_activate_node('%s', %d);",
|
||||
targetNodeName, targetNodePort)));
|
||||
}
|
||||
|
||||
if (!NodeIsPrimary(workerNode))
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("Copying shards to a secondary (e.g., replica) node is "
|
||||
"not supported")));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* LookupShardTransferMode maps the oids of citus.shard_transfer_mode enum
|
||||
* values to a char.
|
||||
|
@ -1114,154 +1271,6 @@ LookupShardTransferMode(Oid shardReplicationModeOid)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ReplicateColocatedShardPlacement replicates the given shard and its
|
||||
* colocated shards from a source node to target node.
|
||||
*/
|
||||
static void
|
||||
ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName,
|
||||
int32 sourceNodePort, char *targetNodeName,
|
||||
int32 targetNodePort, char shardReplicationMode)
|
||||
{
|
||||
ShardInterval *shardInterval = LoadShardInterval(shardId);
|
||||
Oid distributedTableId = shardInterval->relationId;
|
||||
|
||||
ErrorIfSameNode(sourceNodeName, sourceNodePort,
|
||||
targetNodeName, targetNodePort,
|
||||
"copy");
|
||||
|
||||
ErrorIfTableCannotBeReplicated(shardInterval->relationId);
|
||||
ErrorIfTargetNodeIsNotSafeToCopyTo(targetNodeName, targetNodePort);
|
||||
EnsureNoModificationsHaveBeenDone();
|
||||
|
||||
AcquirePlacementColocationLock(shardInterval->relationId, ExclusiveLock, "copy");
|
||||
|
||||
List *colocatedTableList = ColocatedTableList(distributedTableId);
|
||||
List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
|
||||
|
||||
EnsureTableListOwner(colocatedTableList);
|
||||
EnsureTableListSuitableForReplication(colocatedTableList);
|
||||
|
||||
/*
|
||||
* We sort shardIntervalList so that lock operations will not cause any
|
||||
* deadlocks.
|
||||
*/
|
||||
colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById);
|
||||
|
||||
/*
|
||||
* If there are active placements on both nodes, we assume the copy to already
|
||||
* be done.
|
||||
*/
|
||||
if (IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) &&
|
||||
IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort))
|
||||
{
|
||||
ereport(WARNING, (errmsg("shard is already present on node %s:%d",
|
||||
targetNodeName, targetNodePort),
|
||||
errdetail("Copy may have already completed.")));
|
||||
return;
|
||||
}
|
||||
|
||||
WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort);
|
||||
WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort);
|
||||
|
||||
Oid relationId = RelationIdForShard(shardId);
|
||||
PlacementUpdateEvent *placementUpdateEvent = palloc0(
|
||||
sizeof(PlacementUpdateEvent));
|
||||
placementUpdateEvent->updateType = PLACEMENT_UPDATE_COPY;
|
||||
placementUpdateEvent->shardId = shardId;
|
||||
placementUpdateEvent->sourceNode = sourceNode;
|
||||
placementUpdateEvent->targetNode = targetNode;
|
||||
SetupRebalanceMonitor(list_make1(placementUpdateEvent), relationId,
|
||||
REBALANCE_PROGRESS_MOVING,
|
||||
PLACEMENT_UPDATE_STATUS_SETTING_UP);
|
||||
|
||||
UpdatePlacementUpdateStatusForShardIntervalList(
|
||||
colocatedShardList,
|
||||
sourceNodeName,
|
||||
sourceNodePort,
|
||||
PLACEMENT_UPDATE_STATUS_SETTING_UP);
|
||||
|
||||
/*
|
||||
* At this point of the shard replication, we don't need to block the writes to
|
||||
* shards when logical replication is used.
|
||||
*/
|
||||
bool useLogicalReplication = CanUseLogicalReplication(distributedTableId,
|
||||
shardReplicationMode);
|
||||
if (!useLogicalReplication)
|
||||
{
|
||||
BlockWritesToShardList(colocatedShardList);
|
||||
}
|
||||
|
||||
ShardInterval *colocatedShard = NULL;
|
||||
foreach_ptr(colocatedShard, colocatedShardList)
|
||||
{
|
||||
uint64 colocatedShardId = colocatedShard->shardId;
|
||||
|
||||
/*
|
||||
* For shard copy, there should be healthy placement in source node and no
|
||||
* placement in the target node.
|
||||
*/
|
||||
EnsureShardCanBeCopied(colocatedShardId, sourceNodeName, sourceNodePort,
|
||||
targetNodeName, targetNodePort);
|
||||
}
|
||||
|
||||
if (shardReplicationMode == TRANSFER_MODE_AUTOMATIC)
|
||||
{
|
||||
VerifyTablesHaveReplicaIdentity(colocatedTableList);
|
||||
}
|
||||
|
||||
if (!IsCitusTableType(distributedTableId, REFERENCE_TABLE))
|
||||
{
|
||||
/*
|
||||
* When copying a shard to a new node, we should first ensure that reference
|
||||
* tables are present such that joins work immediately after copying the shard.
|
||||
* When copying a reference table, we are probably trying to achieve just that.
|
||||
*
|
||||
* Since this a long-running operation we do this after the error checks, but
|
||||
* before taking metadata locks.
|
||||
*/
|
||||
EnsureReferenceTablesExistOnAllNodesExtended(shardReplicationMode);
|
||||
}
|
||||
|
||||
DropOrphanedResourcesInSeparateTransaction();
|
||||
|
||||
CopyShardTables(colocatedShardList, sourceNodeName, sourceNodePort,
|
||||
targetNodeName, targetNodePort, useLogicalReplication,
|
||||
"citus_copy_shard_placement");
|
||||
|
||||
/*
|
||||
* Finally insert the placements to pg_dist_placement and sync it to the
|
||||
* metadata workers.
|
||||
*/
|
||||
foreach_ptr(colocatedShard, colocatedShardList)
|
||||
{
|
||||
uint64 colocatedShardId = colocatedShard->shardId;
|
||||
uint32 groupId = GroupForNode(targetNodeName, targetNodePort);
|
||||
uint64 placementId = GetNextPlacementId();
|
||||
|
||||
InsertShardPlacementRow(colocatedShardId, placementId,
|
||||
ShardLength(colocatedShardId),
|
||||
groupId);
|
||||
|
||||
if (ShouldSyncTableMetadata(colocatedShard->relationId))
|
||||
{
|
||||
char *placementCommand = PlacementUpsertCommand(colocatedShardId, placementId,
|
||||
0, groupId);
|
||||
|
||||
SendCommandToWorkersWithMetadata(placementCommand);
|
||||
}
|
||||
}
|
||||
|
||||
UpdatePlacementUpdateStatusForShardIntervalList(
|
||||
colocatedShardList,
|
||||
sourceNodeName,
|
||||
sourceNodePort,
|
||||
PLACEMENT_UPDATE_STATUS_COMPLETED);
|
||||
|
||||
FinalizeCurrentProgressMonitor();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* EnsureTableListOwner ensures current user owns given tables. Superusers
|
||||
* are regarded as owners.
|
||||
|
@ -1278,25 +1287,15 @@ EnsureTableListOwner(List *tableIdList)
|
|||
|
||||
|
||||
/*
|
||||
* EnsureTableListSuitableForReplication errors out if given tables are not
|
||||
* ErrorIfReplicatingDistributedTableWithFKeys errors out if given tables are not
|
||||
* suitable for replication.
|
||||
*/
|
||||
static void
|
||||
EnsureTableListSuitableForReplication(List *tableIdList)
|
||||
ErrorIfReplicatingDistributedTableWithFKeys(List *tableIdList)
|
||||
{
|
||||
Oid tableId = InvalidOid;
|
||||
foreach_oid(tableId, tableIdList)
|
||||
{
|
||||
if (IsForeignTable(tableId))
|
||||
{
|
||||
char *relationName = get_rel_name(tableId);
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("cannot replicate shard"),
|
||||
errdetail("Table %s is a foreign table. Replicating "
|
||||
"shards backed by foreign tables is "
|
||||
"not supported.", relationName)));
|
||||
}
|
||||
|
||||
List *foreignConstraintCommandList =
|
||||
GetReferencingForeignConstaintCommands(tableId);
|
||||
|
||||
|
@ -1318,7 +1317,7 @@ EnsureTableListSuitableForReplication(List *tableIdList)
|
|||
static void
|
||||
CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodePort,
|
||||
char *targetNodeName, int32 targetNodePort, bool useLogicalReplication,
|
||||
char *operationName)
|
||||
const char *operationName)
|
||||
{
|
||||
if (list_length(shardIntervalList) < 1)
|
||||
{
|
||||
|
|
|
@ -53,8 +53,14 @@ worker_copy_table_to_node(PG_FUNCTION_ARGS)
|
|||
targetNodeId);
|
||||
|
||||
StringInfo selectShardQueryForCopy = makeStringInfo();
|
||||
|
||||
/*
|
||||
* Even though we do COPY(SELECT ...) all the columns, we can't just do SELECT * because we need to not COPY generated colums.
|
||||
*/
|
||||
const char *columnList = CopyableColumnNamesFromRelationName(relationSchemaName,
|
||||
relationName);
|
||||
appendStringInfo(selectShardQueryForCopy,
|
||||
"SELECT * FROM %s;", relationQualifiedName);
|
||||
"SELECT %s FROM %s;", columnList, relationQualifiedName);
|
||||
|
||||
ParamListInfo params = NULL;
|
||||
ExecuteQueryStringIntoDestReceiver(selectShardQueryForCopy->data, params,
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "distributed/relation_utils.h"
|
||||
#include "distributed/version_compat.h"
|
||||
#include "distributed/local_executor.h"
|
||||
#include "distributed/replication_origin_session_utils.h"
|
||||
|
||||
/*
|
||||
* LocalCopyBuffer is used in copy callback to return the copied rows.
|
||||
|
@ -73,13 +74,14 @@ static void ShardCopyDestReceiverDestroy(DestReceiver *destReceiver);
|
|||
static bool CanUseLocalCopy(uint32_t destinationNodeId);
|
||||
static StringInfo ConstructShardCopyStatement(List *destinationShardFullyQualifiedName,
|
||||
bool
|
||||
useBinaryFormat);
|
||||
useBinaryFormat, TupleDesc tupleDesc);
|
||||
static void WriteLocalTuple(TupleTableSlot *slot, ShardCopyDestReceiver *copyDest);
|
||||
static int ReadFromLocalBufferCallback(void *outBuf, int minRead, int maxRead);
|
||||
static void LocalCopyToShard(ShardCopyDestReceiver *copyDest, CopyOutState
|
||||
localCopyOutState);
|
||||
static void ConnectToRemoteAndStartCopy(ShardCopyDestReceiver *copyDest);
|
||||
|
||||
|
||||
static bool
|
||||
CanUseLocalCopy(uint32_t destinationNodeId)
|
||||
{
|
||||
|
@ -103,9 +105,16 @@ ConnectToRemoteAndStartCopy(ShardCopyDestReceiver *copyDest)
|
|||
NULL /* database (current) */);
|
||||
ClaimConnectionExclusively(copyDest->connection);
|
||||
|
||||
|
||||
RemoteTransactionBeginIfNecessary(copyDest->connection);
|
||||
|
||||
SetupReplicationOriginRemoteSession(copyDest->connection);
|
||||
|
||||
|
||||
StringInfo copyStatement = ConstructShardCopyStatement(
|
||||
copyDest->destinationShardFullyQualifiedName,
|
||||
copyDest->copyOutState->binary);
|
||||
copyDest->copyOutState->binary,
|
||||
copyDest->tupleDescriptor);
|
||||
|
||||
if (!SendRemoteCommand(copyDest->connection, copyStatement->data))
|
||||
{
|
||||
|
@ -184,6 +193,8 @@ ShardCopyDestReceiverReceive(TupleTableSlot *slot, DestReceiver *dest)
|
|||
CopyOutState copyOutState = copyDest->copyOutState;
|
||||
if (copyDest->useLocalCopy)
|
||||
{
|
||||
/* Setup replication origin session for local copy*/
|
||||
|
||||
WriteLocalTuple(slot, copyDest);
|
||||
if (copyOutState->fe_msgbuf->len > LocalCopyFlushThresholdByte)
|
||||
{
|
||||
|
@ -259,6 +270,11 @@ ShardCopyDestReceiverStartup(DestReceiver *dest, int operation, TupleDesc
|
|||
copyDest->columnOutputFunctions = ColumnOutputFunctions(inputTupleDescriptor,
|
||||
copyOutState->binary);
|
||||
copyDest->copyOutState = copyOutState;
|
||||
if (copyDest->useLocalCopy)
|
||||
{
|
||||
/* Setup replication origin session for local copy*/
|
||||
SetupReplicationOriginLocalSession();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -317,6 +333,9 @@ ShardCopyDestReceiverShutdown(DestReceiver *dest)
|
|||
|
||||
PQclear(result);
|
||||
ForgetResults(copyDest->connection);
|
||||
|
||||
ResetReplicationOriginRemoteSession(copyDest->connection);
|
||||
|
||||
CloseConnection(copyDest->connection);
|
||||
}
|
||||
}
|
||||
|
@ -329,6 +348,10 @@ static void
|
|||
ShardCopyDestReceiverDestroy(DestReceiver *dest)
|
||||
{
|
||||
ShardCopyDestReceiver *copyDest = (ShardCopyDestReceiver *) dest;
|
||||
if (copyDest->useLocalCopy)
|
||||
{
|
||||
ResetReplicationOriginLocalSession();
|
||||
}
|
||||
|
||||
if (copyDest->copyOutState)
|
||||
{
|
||||
|
@ -344,21 +367,80 @@ ShardCopyDestReceiverDestroy(DestReceiver *dest)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* CopyableColumnNamesFromTupleDesc function creates and returns a comma seperated column names string to be used in COPY
|
||||
* and SELECT statements when copying a table. The COPY and SELECT statements should filter out the GENERATED columns since COPY
|
||||
* statement fails to handle them. Iterating over the attributes of the table we also need to skip the dropped columns.
|
||||
*/
|
||||
const char *
|
||||
CopyableColumnNamesFromTupleDesc(TupleDesc tupDesc)
|
||||
{
|
||||
StringInfo columnList = makeStringInfo();
|
||||
bool firstInList = true;
|
||||
|
||||
for (int i = 0; i < tupDesc->natts; i++)
|
||||
{
|
||||
Form_pg_attribute att = TupleDescAttr(tupDesc, i);
|
||||
if (att->attgenerated || att->attisdropped)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (!firstInList)
|
||||
{
|
||||
appendStringInfo(columnList, ",");
|
||||
}
|
||||
|
||||
firstInList = false;
|
||||
|
||||
appendStringInfo(columnList, "%s", quote_identifier(NameStr(att->attname)));
|
||||
}
|
||||
|
||||
return columnList->data;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CopyableColumnNamesFromRelationName function is a wrapper for CopyableColumnNamesFromTupleDesc.
|
||||
*/
|
||||
const char *
|
||||
CopyableColumnNamesFromRelationName(const char *schemaName, const char *relationName)
|
||||
{
|
||||
Oid namespaceOid = get_namespace_oid(schemaName, true);
|
||||
|
||||
Oid relationId = get_relname_relid(relationName, namespaceOid);
|
||||
|
||||
Relation relation = relation_open(relationId, AccessShareLock);
|
||||
|
||||
TupleDesc tupleDesc = RelationGetDescr(relation);
|
||||
|
||||
const char *columnList = CopyableColumnNamesFromTupleDesc(tupleDesc);
|
||||
|
||||
relation_close(relation, NoLock);
|
||||
|
||||
return columnList;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ConstructShardCopyStatement constructs the text of a COPY statement
|
||||
* for copying into a result table
|
||||
*/
|
||||
static StringInfo
|
||||
ConstructShardCopyStatement(List *destinationShardFullyQualifiedName, bool
|
||||
useBinaryFormat)
|
||||
useBinaryFormat,
|
||||
TupleDesc tupleDesc)
|
||||
{
|
||||
char *destinationShardSchemaName = linitial(destinationShardFullyQualifiedName);
|
||||
char *destinationShardRelationName = lsecond(destinationShardFullyQualifiedName);
|
||||
|
||||
|
||||
StringInfo command = makeStringInfo();
|
||||
appendStringInfo(command, "COPY %s.%s FROM STDIN",
|
||||
|
||||
const char *columnList = CopyableColumnNamesFromTupleDesc(tupleDesc);
|
||||
|
||||
appendStringInfo(command, "COPY %s.%s (%s) FROM STDIN",
|
||||
quote_identifier(destinationShardSchemaName), quote_identifier(
|
||||
destinationShardRelationName));
|
||||
destinationShardRelationName), columnList);
|
||||
|
||||
if (useBinaryFormat)
|
||||
{
|
||||
|
|
|
@ -110,8 +110,13 @@ worker_split_copy(PG_FUNCTION_ARGS)
|
|||
splitCopyInfoList))));
|
||||
|
||||
StringInfo selectShardQueryForCopy = makeStringInfo();
|
||||
const char *columnList = CopyableColumnNamesFromRelationName(
|
||||
sourceShardToCopySchemaName,
|
||||
sourceShardToCopyName);
|
||||
|
||||
appendStringInfo(selectShardQueryForCopy,
|
||||
"SELECT * FROM %s;", sourceShardToCopyQualifiedName);
|
||||
"SELECT %s FROM %s;", columnList,
|
||||
sourceShardToCopyQualifiedName);
|
||||
|
||||
ParamListInfo params = NULL;
|
||||
ExecuteQueryStringIntoDestReceiver(selectShardQueryForCopy->data, params,
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include "distributed/intermediate_results.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/merge_planner.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/multi_executor.h"
|
||||
#include "distributed/distributed_planner.h"
|
||||
|
@ -68,6 +69,17 @@
|
|||
#include "utils/syscache.h"
|
||||
|
||||
|
||||
/* RouterPlanType is used to determine the router plan to invoke */
|
||||
typedef enum RouterPlanType
|
||||
{
|
||||
INSERT_SELECT_INTO_CITUS_TABLE,
|
||||
INSERT_SELECT_INTO_LOCAL_TABLE,
|
||||
DML_QUERY,
|
||||
SELECT_QUERY,
|
||||
MERGE_QUERY,
|
||||
REPLAN_WITH_BOUND_PARAMETERS
|
||||
} RouterPlanType;
|
||||
|
||||
static List *plannerRestrictionContextList = NIL;
|
||||
int MultiTaskQueryLogLevel = CITUS_LOG_LEVEL_OFF; /* multi-task query log level */
|
||||
static uint64 NextPlanId = 1;
|
||||
|
@ -75,12 +87,8 @@ static uint64 NextPlanId = 1;
|
|||
/* keep track of planner call stack levels */
|
||||
int PlannerLevel = 0;
|
||||
|
||||
static void ErrorIfQueryHasUnsupportedMergeCommand(Query *queryTree,
|
||||
List *rangeTableList);
|
||||
static bool ContainsMergeCommandWalker(Node *node);
|
||||
static bool ListContainsDistributedTableRTE(List *rangeTableList,
|
||||
bool *maybeHasForeignDistributedTable);
|
||||
static bool IsUpdateOrDelete(Query *query);
|
||||
static PlannedStmt * CreateDistributedPlannedStmt(
|
||||
DistributedPlanningContext *planContext);
|
||||
static PlannedStmt * InlineCtesAndCreateDistributedPlannedStmt(uint64 planId,
|
||||
|
@ -132,7 +140,10 @@ static PlannedStmt * PlanDistributedStmt(DistributedPlanningContext *planContext
|
|||
static RTEListProperties * GetRTEListProperties(List *rangeTableList);
|
||||
static List * TranslatedVars(PlannerInfo *root, int relationIndex);
|
||||
static void WarnIfListHasForeignDistributedTable(List *rangeTableList);
|
||||
static void ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList);
|
||||
static RouterPlanType GetRouterPlanType(Query *query,
|
||||
Query *originalQuery,
|
||||
bool hasUnresolvedParams);
|
||||
|
||||
|
||||
/* Distributed planner hook */
|
||||
PlannedStmt *
|
||||
|
@ -156,7 +167,7 @@ distributed_planner(Query *parse,
|
|||
* We cannot have merge command for this path as well because
|
||||
* there cannot be recursively planned merge command.
|
||||
*/
|
||||
Assert(!ContainsMergeCommandWalker((Node *) parse));
|
||||
Assert(!IsMergeQuery(parse));
|
||||
|
||||
needsDistributedPlanning = true;
|
||||
}
|
||||
|
@ -200,12 +211,6 @@ distributed_planner(Query *parse,
|
|||
|
||||
if (!fastPathRouterQuery)
|
||||
{
|
||||
/*
|
||||
* Fast path queries cannot have merge command, and we
|
||||
* prevent the remaining here.
|
||||
*/
|
||||
ErrorIfQueryHasUnsupportedMergeCommand(parse, rangeTableList);
|
||||
|
||||
/*
|
||||
* When there are partitioned tables (not applicable to fast path),
|
||||
* pretend that they are regular tables to avoid unnecessary work
|
||||
|
@ -304,72 +309,6 @@ distributed_planner(Query *parse,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ErrorIfQueryHasUnsupportedMergeCommand walks over the query tree and bails out
|
||||
* if there is no Merge command (e.g., CMD_MERGE) in the query tree. For merge,
|
||||
* looks for all supported combinations, throws an exception if any violations
|
||||
* are seen.
|
||||
*/
|
||||
static void
|
||||
ErrorIfQueryHasUnsupportedMergeCommand(Query *queryTree, List *rangeTableList)
|
||||
{
|
||||
/*
|
||||
* Postgres currently doesn't support Merge queries inside subqueries and
|
||||
* ctes, but lets be defensive and do query tree walk anyway.
|
||||
*
|
||||
* We do not call this path for fast-path queries to avoid this additional
|
||||
* overhead.
|
||||
*/
|
||||
if (!ContainsMergeCommandWalker((Node *) queryTree))
|
||||
{
|
||||
/* No MERGE found */
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* In Citus we have limited support for MERGE, it's allowed
|
||||
* only if all the tables(target, source or any CTE) tables
|
||||
* are are local i.e. a combination of Citus local and Non-Citus
|
||||
* tables (regular Postgres tables).
|
||||
*/
|
||||
ErrorIfMergeHasUnsupportedTables(queryTree, rangeTableList);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ContainsMergeCommandWalker walks over the node and finds if there are any
|
||||
* Merge command (e.g., CMD_MERGE) in the node.
|
||||
*/
|
||||
static bool
|
||||
ContainsMergeCommandWalker(Node *node)
|
||||
{
|
||||
#if PG_VERSION_NUM < PG_VERSION_15
|
||||
return false;
|
||||
#endif
|
||||
|
||||
if (node == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (IsA(node, Query))
|
||||
{
|
||||
Query *query = (Query *) node;
|
||||
if (IsMergeQuery(query))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return query_tree_walker((Query *) node, ContainsMergeCommandWalker, NULL, 0);
|
||||
}
|
||||
|
||||
return expression_tree_walker(node, ContainsMergeCommandWalker, NULL);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ExtractRangeTableEntryList is a wrapper around ExtractRangeTableEntryWalker.
|
||||
* The function traverses the input query and returns all the range table
|
||||
|
@ -669,17 +608,6 @@ IsMultiTaskPlan(DistributedPlan *distributedPlan)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsUpdateOrDelete returns true if the query performs an update or delete.
|
||||
*/
|
||||
bool
|
||||
IsUpdateOrDelete(Query *query)
|
||||
{
|
||||
return query->commandType == CMD_UPDATE ||
|
||||
query->commandType == CMD_DELETE;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* PlanFastPathDistributedStmt creates a distributed planned statement using
|
||||
* the FastPathPlanner.
|
||||
|
@ -850,7 +778,7 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext)
|
|||
* if it is planned as a multi shard modify query.
|
||||
*/
|
||||
if ((distributedPlan->planningError ||
|
||||
(IsUpdateOrDelete(planContext->originalQuery) && IsMultiTaskPlan(
|
||||
(UpdateOrDeleteOrMergeQuery(planContext->originalQuery) && IsMultiTaskPlan(
|
||||
distributedPlan))) &&
|
||||
hasUnresolvedParams)
|
||||
{
|
||||
|
@ -955,6 +883,51 @@ TryCreateDistributedPlannedStmt(PlannedStmt *localPlan,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetRouterPlanType checks the parse tree to return appropriate plan type.
|
||||
*/
|
||||
static RouterPlanType
|
||||
GetRouterPlanType(Query *query, Query *originalQuery, bool hasUnresolvedParams)
|
||||
{
|
||||
if (!IsModifyCommand(originalQuery))
|
||||
{
|
||||
return SELECT_QUERY;
|
||||
}
|
||||
|
||||
Oid targetRelationId = ModifyQueryResultRelationId(query);
|
||||
|
||||
EnsureModificationsCanRunOnRelation(targetRelationId);
|
||||
EnsurePartitionTableNotReplicated(targetRelationId);
|
||||
|
||||
/* Check the type of modification being done */
|
||||
|
||||
if (InsertSelectIntoCitusTable(originalQuery))
|
||||
{
|
||||
if (hasUnresolvedParams)
|
||||
{
|
||||
return REPLAN_WITH_BOUND_PARAMETERS;
|
||||
}
|
||||
return INSERT_SELECT_INTO_CITUS_TABLE;
|
||||
}
|
||||
else if (InsertSelectIntoLocalTable(originalQuery))
|
||||
{
|
||||
if (hasUnresolvedParams)
|
||||
{
|
||||
return REPLAN_WITH_BOUND_PARAMETERS;
|
||||
}
|
||||
return INSERT_SELECT_INTO_LOCAL_TABLE;
|
||||
}
|
||||
else if (IsMergeQuery(originalQuery))
|
||||
{
|
||||
return MERGE_QUERY;
|
||||
}
|
||||
else
|
||||
{
|
||||
return DML_QUERY;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CreateDistributedPlan generates a distributed plan for a query.
|
||||
* It goes through 3 steps:
|
||||
|
@ -972,51 +945,71 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
|
|||
DistributedPlan *distributedPlan = NULL;
|
||||
bool hasCtes = originalQuery->cteList != NIL;
|
||||
|
||||
if (IsModifyCommand(originalQuery))
|
||||
/* Step 1: Try router planner */
|
||||
|
||||
RouterPlanType routerPlan = GetRouterPlanType(query, originalQuery,
|
||||
hasUnresolvedParams);
|
||||
|
||||
switch (routerPlan)
|
||||
{
|
||||
Oid targetRelationId = ModifyQueryResultRelationId(query);
|
||||
|
||||
EnsureModificationsCanRunOnRelation(targetRelationId);
|
||||
|
||||
EnsurePartitionTableNotReplicated(targetRelationId);
|
||||
|
||||
if (InsertSelectIntoCitusTable(originalQuery))
|
||||
case INSERT_SELECT_INTO_CITUS_TABLE:
|
||||
{
|
||||
if (hasUnresolvedParams)
|
||||
{
|
||||
/*
|
||||
* Unresolved parameters can cause performance regressions in
|
||||
* INSERT...SELECT when the partition column is a parameter
|
||||
* because we don't perform any additional pruning in the executor.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
|
||||
distributedPlan =
|
||||
CreateInsertSelectPlan(planId, originalQuery, plannerRestrictionContext,
|
||||
CreateInsertSelectPlan(planId,
|
||||
originalQuery,
|
||||
plannerRestrictionContext,
|
||||
boundParams);
|
||||
break;
|
||||
}
|
||||
else if (InsertSelectIntoLocalTable(originalQuery))
|
||||
|
||||
case INSERT_SELECT_INTO_LOCAL_TABLE:
|
||||
{
|
||||
if (hasUnresolvedParams)
|
||||
{
|
||||
/*
|
||||
* Unresolved parameters can cause performance regressions in
|
||||
* INSERT...SELECT when the partition column is a parameter
|
||||
* because we don't perform any additional pruning in the executor.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
distributedPlan =
|
||||
CreateInsertSelectIntoLocalTablePlan(planId, originalQuery, boundParams,
|
||||
CreateInsertSelectIntoLocalTablePlan(planId,
|
||||
originalQuery,
|
||||
boundParams,
|
||||
hasUnresolvedParams,
|
||||
plannerRestrictionContext);
|
||||
break;
|
||||
}
|
||||
else
|
||||
|
||||
case DML_QUERY:
|
||||
{
|
||||
/* modifications are always routed through the same planner/executor */
|
||||
distributedPlan =
|
||||
CreateModifyPlan(originalQuery, query, plannerRestrictionContext);
|
||||
break;
|
||||
}
|
||||
|
||||
case MERGE_QUERY:
|
||||
{
|
||||
distributedPlan =
|
||||
CreateMergePlan(originalQuery, query, plannerRestrictionContext);
|
||||
break;
|
||||
}
|
||||
|
||||
case REPLAN_WITH_BOUND_PARAMETERS:
|
||||
{
|
||||
/*
|
||||
* Unresolved parameters can cause performance regressions in
|
||||
* INSERT...SELECT when the partition column is a parameter
|
||||
* because we don't perform any additional pruning in the executor.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
|
||||
case SELECT_QUERY:
|
||||
{
|
||||
/*
|
||||
* For select queries we, if router executor is enabled, first try to
|
||||
* plan the query as a router query. If not supported, otherwise try
|
||||
* the full blown plan/optimize/physical planning process needed to
|
||||
* produce distributed query plans.
|
||||
*/
|
||||
distributedPlan =
|
||||
CreateRouterPlan(originalQuery, query, plannerRestrictionContext);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* the functions above always return a plan, possibly with an error */
|
||||
|
@ -1030,31 +1023,6 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
|
|||
{
|
||||
RaiseDeferredError(distributedPlan->planningError, DEBUG2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* For select queries we, if router executor is enabled, first try to
|
||||
* plan the query as a router query. If not supported, otherwise try
|
||||
* the full blown plan/optimize/physical planning process needed to
|
||||
* produce distributed query plans.
|
||||
*/
|
||||
|
||||
distributedPlan = CreateRouterPlan(originalQuery, query,
|
||||
plannerRestrictionContext);
|
||||
if (distributedPlan->planningError == NULL)
|
||||
{
|
||||
return distributedPlan;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* For debugging it's useful to display why query was not
|
||||
* router plannable.
|
||||
*/
|
||||
RaiseDeferredError(distributedPlan->planningError, DEBUG2);
|
||||
}
|
||||
}
|
||||
|
||||
if (hasUnresolvedParams)
|
||||
{
|
||||
|
@ -1082,6 +1050,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
|
|||
boundParams);
|
||||
Assert(originalQuery != NULL);
|
||||
|
||||
/* Step 2: Generate subplans for CTEs and complex subqueries */
|
||||
|
||||
/*
|
||||
* Plan subqueries and CTEs that cannot be pushed down by recursively
|
||||
* calling the planner and return the resulting plans to subPlanList.
|
||||
|
@ -1182,6 +1152,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
|
|||
query->cteList = NIL;
|
||||
Assert(originalQuery->cteList == NIL);
|
||||
|
||||
/* Step 3: Try Logical planner */
|
||||
|
||||
MultiTreeRoot *logicalPlan = MultiLogicalPlanCreate(originalQuery, query,
|
||||
plannerRestrictionContext);
|
||||
MultiLogicalPlanOptimize(logicalPlan);
|
||||
|
@ -2611,148 +2583,3 @@ WarnIfListHasForeignDistributedTable(List *rangeTableList)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is
|
||||
* permitted on special relations, such as materialized view, returns true only if
|
||||
* it's a "source" relation.
|
||||
*/
|
||||
bool
|
||||
IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte)
|
||||
{
|
||||
if (!IsMergeQuery(parse))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable);
|
||||
|
||||
/* Is it a target relation? */
|
||||
if (targetRte->relid == rte->relid)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE
|
||||
* present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus
|
||||
* tables (regular Postgres tables), raises an exception for all other combinations.
|
||||
*/
|
||||
static void
|
||||
ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList)
|
||||
{
|
||||
ListCell *tableCell = NULL;
|
||||
|
||||
foreach(tableCell, rangeTableList)
|
||||
{
|
||||
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(tableCell);
|
||||
Oid relationId = rangeTableEntry->relid;
|
||||
|
||||
switch (rangeTableEntry->rtekind)
|
||||
{
|
||||
case RTE_RELATION:
|
||||
{
|
||||
/* Check the relation type */
|
||||
break;
|
||||
}
|
||||
|
||||
case RTE_SUBQUERY:
|
||||
case RTE_FUNCTION:
|
||||
case RTE_TABLEFUNC:
|
||||
case RTE_VALUES:
|
||||
case RTE_JOIN:
|
||||
case RTE_CTE:
|
||||
{
|
||||
/* Skip them as base table(s) will be checked */
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations,
|
||||
* such as, trigger data; until we find a genuine use case, raise an
|
||||
* exception.
|
||||
* RTE_RESULT is a node added by the planner and we shouldn't
|
||||
* encounter it in the parse tree.
|
||||
*/
|
||||
case RTE_NAMEDTUPLESTORE:
|
||||
case RTE_RESULT:
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("MERGE command is not supported with "
|
||||
"Tuplestores and results")));
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("MERGE command: Unrecognized range table entry.")));
|
||||
}
|
||||
}
|
||||
|
||||
/* RTE Relation can be of various types, check them now */
|
||||
|
||||
/* skip the regular views as they are replaced with subqueries */
|
||||
if (rangeTableEntry->relkind == RELKIND_VIEW)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rangeTableEntry->relkind == RELKIND_MATVIEW ||
|
||||
rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE)
|
||||
{
|
||||
/* Materialized view or Foreign table as target is not allowed */
|
||||
if (IsMergeAllowedOnRelation(parse, rangeTableEntry))
|
||||
{
|
||||
/* Non target relation is ok */
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("MERGE command is not allowed "
|
||||
"on materialized view")));
|
||||
}
|
||||
}
|
||||
|
||||
if (rangeTableEntry->relkind != RELKIND_RELATION &&
|
||||
rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE)
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("Unexpected relation type(relkind:%c) in MERGE command",
|
||||
rangeTableEntry->relkind)));
|
||||
}
|
||||
|
||||
Assert(rangeTableEntry->relid != 0);
|
||||
|
||||
/* Distributed tables and Reference tables are not supported yet */
|
||||
if (IsCitusTableType(relationId, REFERENCE_TABLE) ||
|
||||
IsCitusTableType(relationId, DISTRIBUTED_TABLE))
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("MERGE command is not supported on "
|
||||
"distributed/reference tables yet")));
|
||||
}
|
||||
|
||||
/* Regular Postgres tables and Citus local tables are allowed */
|
||||
if (!IsCitusTable(relationId) ||
|
||||
IsCitusTableType(relationId, CITUS_LOCAL_TABLE))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
/* Any other Citus table type missing ? */
|
||||
}
|
||||
|
||||
/* All the tables are local, supported */
|
||||
}
|
||||
|
|
|
@ -54,10 +54,11 @@
|
|||
bool EnableFastPathRouterPlanner = true;
|
||||
|
||||
static bool ColumnAppearsMultipleTimes(Node *quals, Var *distributionKey);
|
||||
static bool ConjunctionContainsColumnFilter(Node *node, Var *column,
|
||||
Node **distributionKeyValue);
|
||||
static bool DistKeyInSimpleOpExpression(Expr *clause, Var *distColumn,
|
||||
Node **distributionKeyValue);
|
||||
static bool ConjunctionContainsColumnFilter(Node *node,
|
||||
Var *column,
|
||||
Node **distributionKeyValue);
|
||||
|
||||
|
||||
/*
|
||||
|
|
|
@ -875,7 +875,7 @@ RouterModifyTaskForShardInterval(Query *originalQuery,
|
|||
&prunedShardIntervalListList,
|
||||
replacePrunedQueryWithDummy,
|
||||
&multiShardModifyQuery, NULL,
|
||||
false);
|
||||
NULL);
|
||||
|
||||
Assert(!multiShardModifyQuery);
|
||||
|
||||
|
@ -938,6 +938,7 @@ RouterModifyTaskForShardInterval(Query *originalQuery,
|
|||
modifyTask->taskPlacementList = insertShardPlacementList;
|
||||
modifyTask->relationShardList = relationShardList;
|
||||
modifyTask->replicationModel = targetTableCacheEntry->replicationModel;
|
||||
modifyTask->isLocalTableModification = false;
|
||||
|
||||
return modifyTask;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,738 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* merge_planner.c
|
||||
*
|
||||
* This file contains functions to help plan MERGE queries.
|
||||
*
|
||||
* Copyright (c) Citus Data, Inc.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "postgres.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
#include "optimizer/optimizer.h"
|
||||
#include "parser/parsetree.h"
|
||||
#include "utils/lsyscache.h"
|
||||
|
||||
#include "distributed/citus_clauses.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/merge_planner.h"
|
||||
#include "distributed/multi_logical_optimizer.h"
|
||||
#include "distributed/multi_router_planner.h"
|
||||
#include "distributed/pg_version_constants.h"
|
||||
#include "distributed/query_pushdown_planning.h"
|
||||
|
||||
#if PG_VERSION_NUM >= PG_VERSION_15
|
||||
|
||||
static DeferredErrorMessage * CheckIfRTETypeIsUnsupported(Query *parse,
|
||||
RangeTblEntry *rangeTableEntry);
|
||||
static DeferredErrorMessage * ErrorIfDistTablesNotColocated(Query *parse,
|
||||
List *
|
||||
distTablesList,
|
||||
PlannerRestrictionContext
|
||||
*
|
||||
plannerRestrictionContext);
|
||||
static DeferredErrorMessage * ErrorIfMergeHasUnsupportedTables(Query *parse,
|
||||
List *rangeTableList,
|
||||
PlannerRestrictionContext *
|
||||
restrictionContext);
|
||||
static bool IsDistributionColumnInMergeSource(Expr *columnExpression, Query *query, bool
|
||||
skipOuterVars);
|
||||
static DeferredErrorMessage * InsertDistributionColumnMatchesSource(Query *query,
|
||||
RangeTblEntry *
|
||||
resultRte);
|
||||
|
||||
static DeferredErrorMessage * MergeQualAndTargetListFunctionsSupported(Oid
|
||||
resultRelationId,
|
||||
FromExpr *joinTree,
|
||||
Node *quals,
|
||||
List *targetList,
|
||||
CmdType commandType);
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* CreateMergePlan attempts to create a plan for the given MERGE SQL
|
||||
* statement. If planning fails ->planningError is set to a description
|
||||
* of the failure.
|
||||
*/
|
||||
DistributedPlan *
|
||||
CreateMergePlan(Query *originalQuery, Query *query,
|
||||
PlannerRestrictionContext *plannerRestrictionContext)
|
||||
{
|
||||
DistributedPlan *distributedPlan = CitusMakeNode(DistributedPlan);
|
||||
bool multiShardQuery = false;
|
||||
|
||||
Assert(originalQuery->commandType == CMD_MERGE);
|
||||
|
||||
distributedPlan->modLevel = RowModifyLevelForQuery(query);
|
||||
|
||||
distributedPlan->planningError = MergeQuerySupported(originalQuery,
|
||||
multiShardQuery,
|
||||
plannerRestrictionContext);
|
||||
|
||||
if (distributedPlan->planningError != NULL)
|
||||
{
|
||||
return distributedPlan;
|
||||
}
|
||||
|
||||
Job *job = RouterJob(originalQuery, plannerRestrictionContext,
|
||||
&distributedPlan->planningError);
|
||||
|
||||
if (distributedPlan->planningError != NULL)
|
||||
{
|
||||
return distributedPlan;
|
||||
}
|
||||
|
||||
ereport(DEBUG1, (errmsg("Creating MERGE router plan")));
|
||||
|
||||
distributedPlan->workerJob = job;
|
||||
distributedPlan->combineQuery = NULL;
|
||||
|
||||
/* MERGE doesn't support RETURNING clause */
|
||||
distributedPlan->expectResults = false;
|
||||
distributedPlan->targetRelationId = ResultRelationOidForQuery(query);
|
||||
|
||||
distributedPlan->fastPathRouterPlan =
|
||||
plannerRestrictionContext->fastPathRestrictionContext->fastPathRouterQuery;
|
||||
|
||||
return distributedPlan;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* MergeQuerySupported does check for a MERGE command in the query, if it finds
|
||||
* one, it will verify the below criteria
|
||||
* - Supported tables and combinations in ErrorIfMergeHasUnsupportedTables
|
||||
* - Distributed tables requirements in ErrorIfDistTablesNotColocated
|
||||
* - Checks target-lists and functions-in-quals in TargetlistAndFunctionsSupported
|
||||
*/
|
||||
DeferredErrorMessage *
|
||||
MergeQuerySupported(Query *originalQuery, bool multiShardQuery,
|
||||
PlannerRestrictionContext *plannerRestrictionContext)
|
||||
{
|
||||
/* function is void for pre-15 versions of Postgres */
|
||||
#if PG_VERSION_NUM < PG_VERSION_15
|
||||
|
||||
return NULL;
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* TODO: For now, we are adding an exception where any volatile or stable
|
||||
* functions are not allowed in the MERGE query, but this will become too
|
||||
* restrictive as this will prevent many useful and simple cases, such as,
|
||||
* INSERT VALUES(ts::timestamp), bigserial column inserts etc. But without
|
||||
* this restriction, we have a potential danger of some of the function(s)
|
||||
* getting executed at the worker which will result in incorrect behavior.
|
||||
*/
|
||||
if (contain_mutable_functions((Node *) originalQuery))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"non-IMMUTABLE functions are not yet supported "
|
||||
"in MERGE sql with distributed tables ",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
List *rangeTableList = ExtractRangeTableEntryList(originalQuery);
|
||||
RangeTblEntry *resultRte = ExtractResultRelationRTE(originalQuery);
|
||||
|
||||
/*
|
||||
* Fast path queries cannot have merge command, and we prevent the remaining here.
|
||||
* In Citus we have limited support for MERGE, it's allowed only if all
|
||||
* the tables(target, source or any CTE) tables are are local i.e. a
|
||||
* combination of Citus local and Non-Citus tables (regular Postgres tables)
|
||||
* or distributed tables with some restrictions, please see header of routine
|
||||
* ErrorIfDistTablesNotColocated for details.
|
||||
*/
|
||||
DeferredErrorMessage *deferredError =
|
||||
ErrorIfMergeHasUnsupportedTables(originalQuery,
|
||||
rangeTableList,
|
||||
plannerRestrictionContext);
|
||||
if (deferredError)
|
||||
{
|
||||
/* MERGE's unsupported combination, raise the exception */
|
||||
RaiseDeferredError(deferredError, ERROR);
|
||||
}
|
||||
|
||||
Oid resultRelationId = resultRte->relid;
|
||||
deferredError = MergeQualAndTargetListFunctionsSupported(resultRelationId,
|
||||
originalQuery->jointree,
|
||||
originalQuery->jointree->
|
||||
quals,
|
||||
originalQuery->targetList,
|
||||
originalQuery->commandType);
|
||||
if (deferredError)
|
||||
{
|
||||
return deferredError;
|
||||
}
|
||||
|
||||
/*
|
||||
* MERGE is a special case where we have multiple modify statements
|
||||
* within itself. Check each INSERT/UPDATE/DELETE individually.
|
||||
*/
|
||||
MergeAction *action = NULL;
|
||||
foreach_ptr(action, originalQuery->mergeActionList)
|
||||
{
|
||||
Assert(originalQuery->returningList == NULL);
|
||||
deferredError = MergeQualAndTargetListFunctionsSupported(resultRelationId,
|
||||
originalQuery->jointree,
|
||||
action->qual,
|
||||
action->targetList,
|
||||
action->commandType);
|
||||
if (deferredError)
|
||||
{
|
||||
/* MERGE's unsupported scenario, raise the exception */
|
||||
RaiseDeferredError(deferredError, ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
deferredError =
|
||||
InsertDistributionColumnMatchesSource(originalQuery, resultRte);
|
||||
if (deferredError)
|
||||
{
|
||||
/* MERGE's unsupported scenario, raise the exception */
|
||||
RaiseDeferredError(deferredError, ERROR);
|
||||
}
|
||||
|
||||
if (multiShardQuery)
|
||||
{
|
||||
deferredError =
|
||||
DeferErrorIfUnsupportedSubqueryPushdown(originalQuery,
|
||||
plannerRestrictionContext);
|
||||
if (deferredError)
|
||||
{
|
||||
return deferredError;
|
||||
}
|
||||
}
|
||||
|
||||
if (HasDangerousJoinUsing(originalQuery->rtable, (Node *) originalQuery->jointree))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"a join with USING causes an internal naming "
|
||||
"conflict, use ON instead", NULL, NULL);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is
|
||||
* permitted on special relations, such as materialized view, returns true only if
|
||||
* it's a "source" relation.
|
||||
*/
|
||||
bool
|
||||
IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte)
|
||||
{
|
||||
if (!IsMergeQuery(parse))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Fetch the MERGE target relation */
|
||||
RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable);
|
||||
|
||||
/* Is it a target relation? */
|
||||
if (targetRte->relid == rte->relid)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
#if PG_VERSION_NUM >= PG_VERSION_15
|
||||
|
||||
/*
|
||||
* ErrorIfDistTablesNotColocated Checks to see if
|
||||
*
|
||||
* - There are a minimum of two distributed tables (source and a target).
|
||||
* - All the distributed tables are indeed colocated.
|
||||
*
|
||||
* If any of the conditions are not met, it raises an exception.
|
||||
*/
|
||||
static DeferredErrorMessage *
|
||||
ErrorIfDistTablesNotColocated(Query *parse, List *distTablesList,
|
||||
PlannerRestrictionContext *
|
||||
plannerRestrictionContext)
|
||||
{
|
||||
/* All MERGE tables must be distributed */
|
||||
if (list_length(distTablesList) < 2)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"For MERGE command, both the source and target "
|
||||
"must be distributed", NULL, NULL);
|
||||
}
|
||||
|
||||
/* All distributed tables must be colocated */
|
||||
if (!AllDistributedRelationsInRTEListColocated(distTablesList))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"For MERGE command, all the distributed tables "
|
||||
"must be colocated", NULL, NULL);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ErrorIfRTETypeIsUnsupported Checks for types of tables that are not supported, such
|
||||
* as, reference tables, append-distributed tables and materialized view as target relation.
|
||||
* Routine returns NULL for the supported types, error message for everything else.
|
||||
*/
|
||||
static DeferredErrorMessage *
|
||||
CheckIfRTETypeIsUnsupported(Query *parse, RangeTblEntry *rangeTableEntry)
|
||||
{
|
||||
if (rangeTableEntry->relkind == RELKIND_MATVIEW ||
|
||||
rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE)
|
||||
{
|
||||
/* Materialized view or Foreign table as target is not allowed */
|
||||
if (IsMergeAllowedOnRelation(parse, rangeTableEntry))
|
||||
{
|
||||
/* Non target relation is ok */
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Usually we don't reach this exception as the Postgres parser catches it */
|
||||
StringInfo errorMessage = makeStringInfo();
|
||||
appendStringInfo(errorMessage, "MERGE command is not allowed on "
|
||||
"relation type(relkind:%c)",
|
||||
rangeTableEntry->relkind);
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
errorMessage->data, NULL, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
if (rangeTableEntry->relkind != RELKIND_RELATION &&
|
||||
rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE)
|
||||
{
|
||||
StringInfo errorMessage = makeStringInfo();
|
||||
appendStringInfo(errorMessage, "Unexpected table type(relkind:%c) "
|
||||
"in MERGE command", rangeTableEntry->relkind);
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
errorMessage->data, NULL, NULL);
|
||||
}
|
||||
|
||||
Assert(rangeTableEntry->relid != 0);
|
||||
|
||||
/* Reference tables are not supported yet */
|
||||
if (IsCitusTableType(rangeTableEntry->relid, REFERENCE_TABLE))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"MERGE command is not supported on reference "
|
||||
"tables yet", NULL, NULL);
|
||||
}
|
||||
|
||||
/* Append/Range tables are not supported */
|
||||
if (IsCitusTableType(rangeTableEntry->relid, APPEND_DISTRIBUTED) ||
|
||||
IsCitusTableType(rangeTableEntry->relid, RANGE_DISTRIBUTED))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"For MERGE command, all the distributed tables "
|
||||
"must be colocated, for append/range distribution, "
|
||||
"colocation is not supported", NULL,
|
||||
"Consider using hash distribution instead");
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE
|
||||
* present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus
|
||||
* tables (regular Postgres tables), or distributed tables with some restrictions, please
|
||||
* see header of routine ErrorIfDistTablesNotColocated for details, raises an exception
|
||||
* for all other combinations.
|
||||
*/
|
||||
static DeferredErrorMessage *
|
||||
ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList,
|
||||
PlannerRestrictionContext *restrictionContext)
|
||||
{
|
||||
List *distTablesList = NIL;
|
||||
bool foundLocalTables = false;
|
||||
|
||||
RangeTblEntry *rangeTableEntry = NULL;
|
||||
foreach_ptr(rangeTableEntry, rangeTableList)
|
||||
{
|
||||
Oid relationId = rangeTableEntry->relid;
|
||||
|
||||
switch (rangeTableEntry->rtekind)
|
||||
{
|
||||
case RTE_RELATION:
|
||||
{
|
||||
/* Check the relation type */
|
||||
break;
|
||||
}
|
||||
|
||||
case RTE_SUBQUERY:
|
||||
case RTE_FUNCTION:
|
||||
case RTE_TABLEFUNC:
|
||||
case RTE_VALUES:
|
||||
case RTE_JOIN:
|
||||
case RTE_CTE:
|
||||
{
|
||||
/* Skip them as base table(s) will be checked */
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations,
|
||||
* such as, trigger data; until we find a genuine use case, raise an
|
||||
* exception.
|
||||
* RTE_RESULT is a node added by the planner and we shouldn't
|
||||
* encounter it in the parse tree.
|
||||
*/
|
||||
case RTE_NAMEDTUPLESTORE:
|
||||
case RTE_RESULT:
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"MERGE command is not supported with "
|
||||
"Tuplestores and results",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"MERGE command: Unrecognized range table entry.",
|
||||
NULL, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/* RTE Relation can be of various types, check them now */
|
||||
|
||||
/* skip the regular views as they are replaced with subqueries */
|
||||
if (rangeTableEntry->relkind == RELKIND_VIEW)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
DeferredErrorMessage *errorMessage =
|
||||
CheckIfRTETypeIsUnsupported(parse, rangeTableEntry);
|
||||
if (errorMessage)
|
||||
{
|
||||
return errorMessage;
|
||||
}
|
||||
|
||||
/*
|
||||
* For now, save all distributed tables, later (below) we will
|
||||
* check for supported combination(s).
|
||||
*/
|
||||
if (IsCitusTableType(relationId, DISTRIBUTED_TABLE))
|
||||
{
|
||||
distTablesList = lappend(distTablesList, rangeTableEntry);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Regular Postgres tables and Citus local tables are allowed */
|
||||
if (!IsCitusTable(relationId) ||
|
||||
IsCitusTableType(relationId, CITUS_LOCAL_TABLE))
|
||||
{
|
||||
foundLocalTables = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Any other Citus table type missing ? */
|
||||
}
|
||||
|
||||
/* Ensure all tables are indeed local */
|
||||
if (foundLocalTables && list_length(distTablesList) == 0)
|
||||
{
|
||||
/* All the tables are local, supported */
|
||||
return NULL;
|
||||
}
|
||||
else if (foundLocalTables && list_length(distTablesList) > 0)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"MERGE command is not supported with "
|
||||
"combination of distributed/local tables yet",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
/* Ensure all distributed tables are indeed co-located */
|
||||
return ErrorIfDistTablesNotColocated(parse,
|
||||
distTablesList,
|
||||
restrictionContext);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsPartitionColumnInMerge returns true if the given column is a partition column.
|
||||
* The function uses FindReferencedTableColumn to find the original relation
|
||||
* id and column that the column expression refers to. It then checks whether
|
||||
* that column is a partition column of the relation.
|
||||
*
|
||||
* Also, the function returns always false for reference tables given that
|
||||
* reference tables do not have partition column.
|
||||
*
|
||||
* If skipOuterVars is true, then it doesn't process the outervars.
|
||||
*/
|
||||
bool
|
||||
IsDistributionColumnInMergeSource(Expr *columnExpression, Query *query, bool
|
||||
skipOuterVars)
|
||||
{
|
||||
bool isDistributionColumn = false;
|
||||
Var *column = NULL;
|
||||
RangeTblEntry *relationRTE = NULL;
|
||||
|
||||
/* ParentQueryList is same as the original query for MERGE */
|
||||
FindReferencedTableColumn(columnExpression, list_make1(query), query, &column,
|
||||
&relationRTE,
|
||||
skipOuterVars);
|
||||
Oid relationId = relationRTE ? relationRTE->relid : InvalidOid;
|
||||
if (relationId != InvalidOid && column != NULL)
|
||||
{
|
||||
Var *distributionColumn = DistPartitionKey(relationId);
|
||||
|
||||
/* not all distributed tables have partition column */
|
||||
if (distributionColumn != NULL && column->varattno ==
|
||||
distributionColumn->varattno)
|
||||
{
|
||||
isDistributionColumn = true;
|
||||
}
|
||||
}
|
||||
|
||||
return isDistributionColumn;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* InsertDistributionColumnMatchesSource check to see if MERGE is inserting a
|
||||
* value into the target which is not from the source table, if so, it
|
||||
* raises an exception.
|
||||
* Note: Inserting random values other than the joined column values will
|
||||
* result in unexpected behaviour of rows ending up in incorrect shards, to
|
||||
* prevent such mishaps, we disallow such inserts here.
|
||||
*/
|
||||
static DeferredErrorMessage *
|
||||
InsertDistributionColumnMatchesSource(Query *query, RangeTblEntry *resultRte)
|
||||
{
|
||||
Assert(IsMergeQuery(query));
|
||||
|
||||
if (!IsCitusTableType(resultRte->relid, DISTRIBUTED_TABLE))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool foundDistributionColumn = false;
|
||||
MergeAction *action = NULL;
|
||||
foreach_ptr(action, query->mergeActionList)
|
||||
{
|
||||
/* Skip MATCHED clause as INSERTS are not allowed in it*/
|
||||
if (action->matched)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
/* NOT MATCHED can have either INSERT or DO NOTHING */
|
||||
if (action->commandType == CMD_NOTHING)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (action->targetList == NIL)
|
||||
{
|
||||
/* INSERT DEFAULT VALUES is not allowed */
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"cannot perform MERGE INSERT with DEFAULTS",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
Assert(action->commandType == CMD_INSERT);
|
||||
Var *targetKey = PartitionColumn(resultRte->relid, 1);
|
||||
|
||||
TargetEntry *targetEntry = NULL;
|
||||
foreach_ptr(targetEntry, action->targetList)
|
||||
{
|
||||
AttrNumber originalAttrNo = targetEntry->resno;
|
||||
|
||||
/* skip processing of target table non-partition columns */
|
||||
if (originalAttrNo != targetKey->varattno)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
foundDistributionColumn = true;
|
||||
|
||||
if (IsA(targetEntry->expr, Var))
|
||||
{
|
||||
if (IsDistributionColumnInMergeSource(targetEntry->expr, query, true))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"MERGE INSERT must use the source table "
|
||||
"distribution column value",
|
||||
NULL, NULL);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"MERGE INSERT must refer a source column "
|
||||
"for distribution column ",
|
||||
NULL, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundDistributionColumn)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"MERGE INSERT must have distribution column as value",
|
||||
NULL, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* MergeQualAndTargetListFunctionsSupported Checks WHEN/ON clause actions to see what functions
|
||||
* are allowed, if we are updating distribution column, etc.
|
||||
*/
|
||||
static DeferredErrorMessage *
|
||||
MergeQualAndTargetListFunctionsSupported(Oid resultRelationId, FromExpr *joinTree,
|
||||
Node *quals,
|
||||
List *targetList, CmdType commandType)
|
||||
{
|
||||
uint32 rangeTableId = 1;
|
||||
Var *distributionColumn = NULL;
|
||||
if (IsCitusTable(resultRelationId) && HasDistributionKey(resultRelationId))
|
||||
{
|
||||
distributionColumn = PartitionColumn(resultRelationId, rangeTableId);
|
||||
}
|
||||
|
||||
ListCell *targetEntryCell = NULL;
|
||||
bool hasVarArgument = false; /* A STABLE function is passed a Var argument */
|
||||
bool hasBadCoalesce = false; /* CASE/COALESCE passed a mutable function */
|
||||
foreach(targetEntryCell, targetList)
|
||||
{
|
||||
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
|
||||
|
||||
bool targetEntryDistributionColumn = false;
|
||||
AttrNumber targetColumnAttrNumber = InvalidAttrNumber;
|
||||
|
||||
if (distributionColumn)
|
||||
{
|
||||
if (commandType == CMD_UPDATE)
|
||||
{
|
||||
/*
|
||||
* Note that it is not possible to give an alias to
|
||||
* UPDATE table SET ...
|
||||
*/
|
||||
if (targetEntry->resname)
|
||||
{
|
||||
targetColumnAttrNumber = get_attnum(resultRelationId,
|
||||
targetEntry->resname);
|
||||
if (targetColumnAttrNumber == distributionColumn->varattno)
|
||||
{
|
||||
targetEntryDistributionColumn = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (targetEntryDistributionColumn &&
|
||||
TargetEntryChangesValue(targetEntry, distributionColumn, joinTree))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"updating the distribution column is not "
|
||||
"allowed in MERGE actions",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
|
||||
CitusIsVolatileFunction))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"functions used in MERGE actions on distributed "
|
||||
"tables must not be VOLATILE",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
if (MasterIrreducibleExpression((Node *) targetEntry->expr,
|
||||
&hasVarArgument, &hasBadCoalesce))
|
||||
{
|
||||
Assert(hasVarArgument || hasBadCoalesce);
|
||||
}
|
||||
|
||||
if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
|
||||
NodeIsFieldStore))
|
||||
{
|
||||
/* DELETE cannot do field indirection already */
|
||||
Assert(commandType == CMD_UPDATE || commandType == CMD_INSERT);
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"inserting or modifying composite type fields is not "
|
||||
"supported", NULL,
|
||||
"Use the column name to insert or update the composite "
|
||||
"type as a single value");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Check the condition, convert list of expressions into expression tree for further processing
|
||||
*/
|
||||
if (quals)
|
||||
{
|
||||
if (IsA(quals, List))
|
||||
{
|
||||
quals = (Node *) make_ands_explicit((List *) quals);
|
||||
}
|
||||
|
||||
if (FindNodeMatchingCheckFunction((Node *) quals, CitusIsVolatileFunction))
|
||||
{
|
||||
StringInfo errorMessage = makeStringInfo();
|
||||
appendStringInfo(errorMessage, "functions used in the %s clause of MERGE "
|
||||
"queries on distributed tables must not be VOLATILE",
|
||||
(commandType == CMD_MERGE) ? "ON" : "WHEN");
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
errorMessage->data, NULL, NULL);
|
||||
}
|
||||
else if (MasterIrreducibleExpression(quals, &hasVarArgument, &hasBadCoalesce))
|
||||
{
|
||||
Assert(hasVarArgument || hasBadCoalesce);
|
||||
}
|
||||
}
|
||||
|
||||
if (hasVarArgument)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"STABLE functions used in MERGE queries "
|
||||
"cannot be called with column references",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
if (hasBadCoalesce)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"non-IMMUTABLE functions are not allowed in CASE or "
|
||||
"COALESCE statements",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
if (quals != NULL && nodeTag(quals) == T_CurrentOfExpr)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"cannot run MERGE actions with cursors",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
|
@ -29,6 +29,7 @@
|
|||
#include "distributed/citus_nodefuncs.h"
|
||||
#include "distributed/connection_management.h"
|
||||
#include "distributed/deparse_shard_query.h"
|
||||
#include "distributed/executor_util.h"
|
||||
#include "distributed/insert_select_planner.h"
|
||||
#include "distributed/insert_select_executor.h"
|
||||
#include "distributed/listutils.h"
|
||||
|
@ -199,20 +200,6 @@ CitusExplainScan(CustomScanState *node, List *ancestors, struct ExplainState *es
|
|||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* ALTER TABLE statements are not explained by postgres. However ALTER TABLE statements
|
||||
* may trigger SELECT statements causing explain hook to run. This situation causes a crash in a worker.
|
||||
* Therefore we will detect if we are explaining a triggered query when we are processing
|
||||
* an ALTER TABLE statement and stop explain in this situation.
|
||||
*/
|
||||
if (AlterTableInProgress())
|
||||
{
|
||||
ExplainPropertyText("Citus Explain Scan",
|
||||
"Explain for triggered constraint validation queries during ALTER TABLE commands are not supported by Citus",
|
||||
es);
|
||||
return;
|
||||
}
|
||||
|
||||
ExplainOpenGroup("Distributed Query", "Distributed Query", true, es);
|
||||
|
||||
/*
|
||||
|
|
|
@ -1383,7 +1383,7 @@ DistPartitionKey(Oid relationId)
|
|||
CitusTableCacheEntry *partitionEntry = GetCitusTableCacheEntry(relationId);
|
||||
|
||||
/* non-distributed tables do not have partition column */
|
||||
if (IsCitusTableTypeCacheEntry(partitionEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
if (!HasDistributionKeyCacheEntry(partitionEntry))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -3385,6 +3385,13 @@ GetAggregateType(Aggref *aggregateExpression)
|
|||
{
|
||||
Oid aggFunctionId = aggregateExpression->aggfnoid;
|
||||
|
||||
/* custom aggregates with combine func take precedence over name-based logic */
|
||||
if (aggFunctionId >= FirstNormalObjectId &&
|
||||
AggregateEnabledCustom(aggregateExpression))
|
||||
{
|
||||
return AGGREGATE_CUSTOM_COMBINE;
|
||||
}
|
||||
|
||||
/* look up the function name */
|
||||
char *aggregateProcName = get_func_name(aggFunctionId);
|
||||
if (aggregateProcName == NULL)
|
||||
|
@ -3395,8 +3402,6 @@ GetAggregateType(Aggref *aggregateExpression)
|
|||
|
||||
uint32 aggregateCount = lengthof(AggregateNames);
|
||||
|
||||
Assert(AGGREGATE_INVALID_FIRST == 0);
|
||||
|
||||
for (uint32 aggregateIndex = 1; aggregateIndex < aggregateCount; aggregateIndex++)
|
||||
{
|
||||
const char *aggregateName = AggregateNames[aggregateIndex];
|
||||
|
@ -3465,7 +3470,7 @@ GetAggregateType(Aggref *aggregateExpression)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/* handle any remaining built-in aggregates with a suitable combinefn */
|
||||
if (AggregateEnabledCustom(aggregateExpression))
|
||||
{
|
||||
return AGGREGATE_CUSTOM_COMBINE;
|
||||
|
|
|
@ -228,7 +228,7 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
|||
* If the expression belongs to a non-distributed table continue searching for
|
||||
* other partition keys.
|
||||
*/
|
||||
if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -2199,7 +2199,7 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
|
|||
Oid relationId = relationRestriction->relationId;
|
||||
|
||||
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
|
||||
if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
if (!HasDistributionKeyCacheEntry(cacheEntry))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
@ -2377,7 +2377,7 @@ ErrorIfUnsupportedShardDistribution(Query *query)
|
|||
nonReferenceRelations = lappend_oid(nonReferenceRelations,
|
||||
relationId);
|
||||
}
|
||||
else if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
else if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
|
||||
{
|
||||
/* do not need to handle non-distributed tables */
|
||||
continue;
|
||||
|
@ -2482,7 +2482,7 @@ QueryPushdownTaskCreate(Query *originalQuery, int shardIndex,
|
|||
ShardInterval *shardInterval = NULL;
|
||||
|
||||
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
|
||||
if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
if (!HasDistributionKeyCacheEntry(cacheEntry))
|
||||
{
|
||||
/* non-distributed tables have only one shard */
|
||||
shardInterval = cacheEntry->sortedShardIntervalArray[0];
|
||||
|
@ -3697,7 +3697,7 @@ PartitionedOnColumn(Var *column, List *rangeTableList, List *dependentJobList)
|
|||
Var *partitionColumn = PartitionColumn(relationId, rangeTableId);
|
||||
|
||||
/* non-distributed tables do not have partition columns */
|
||||
if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -4573,7 +4573,8 @@ RowModifyLevelForQuery(Query *query)
|
|||
}
|
||||
|
||||
if (commandType == CMD_UPDATE ||
|
||||
commandType == CMD_DELETE)
|
||||
commandType == CMD_DELETE ||
|
||||
commandType == CMD_MERGE)
|
||||
{
|
||||
return ROW_MODIFY_NONCOMMUTATIVE;
|
||||
}
|
||||
|
@ -5343,8 +5344,7 @@ ActiveShardPlacementLists(List *taskList)
|
|||
|
||||
|
||||
/*
|
||||
* CompareShardPlacements compares two shard placements by their tuple oid; this
|
||||
* oid reflects the tuple's insertion order into pg_dist_placement.
|
||||
* CompareShardPlacements compares two shard placements by placement id.
|
||||
*/
|
||||
int
|
||||
CompareShardPlacements(const void *leftElement, const void *rightElement)
|
||||
|
@ -5370,6 +5370,35 @@ CompareShardPlacements(const void *leftElement, const void *rightElement)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* CompareGroupShardPlacements compares two group shard placements by placement id.
|
||||
*/
|
||||
int
|
||||
CompareGroupShardPlacements(const void *leftElement, const void *rightElement)
|
||||
{
|
||||
const GroupShardPlacement *leftPlacement =
|
||||
*((const GroupShardPlacement **) leftElement);
|
||||
const GroupShardPlacement *rightPlacement =
|
||||
*((const GroupShardPlacement **) rightElement);
|
||||
|
||||
uint64 leftPlacementId = leftPlacement->placementId;
|
||||
uint64 rightPlacementId = rightPlacement->placementId;
|
||||
|
||||
if (leftPlacementId < rightPlacementId)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
else if (leftPlacementId > rightPlacementId)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* LeftRotateList returns a copy of the given list that has been cyclically
|
||||
* shifted to the left by the given rotation count. For this, the function
|
||||
|
|
|
@ -28,11 +28,13 @@
|
|||
#include "distributed/deparse_shard_query.h"
|
||||
#include "distributed/distribution_column.h"
|
||||
#include "distributed/errormessage.h"
|
||||
#include "distributed/executor_util.h"
|
||||
#include "distributed/log_utils.h"
|
||||
#include "distributed/insert_select_planner.h"
|
||||
#include "distributed/intermediate_result_pruning.h"
|
||||
#include "distributed/metadata_utility.h"
|
||||
#include "distributed/coordinator_protocol.h"
|
||||
#include "distributed/merge_planner.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/multi_executor.h"
|
||||
#include "distributed/multi_join_order.h"
|
||||
|
@ -113,6 +115,7 @@ typedef struct WalkerState
|
|||
} WalkerState;
|
||||
|
||||
bool EnableRouterExecution = true;
|
||||
bool EnableNonColocatedRouterQueryPushdown = false;
|
||||
|
||||
|
||||
/* planner functions forward declarations */
|
||||
|
@ -121,34 +124,24 @@ static void CreateSingleTaskRouterSelectPlan(DistributedPlan *distributedPlan,
|
|||
Query *query,
|
||||
PlannerRestrictionContext *
|
||||
plannerRestrictionContext);
|
||||
static Oid ResultRelationOidForQuery(Query *query);
|
||||
static bool IsTidColumn(Node *node);
|
||||
static DeferredErrorMessage * ModifyPartialQuerySupported(Query *queryTree, bool
|
||||
multiShardQuery,
|
||||
Oid *distributedTableId);
|
||||
static bool NodeIsFieldStore(Node *node);
|
||||
static DeferredErrorMessage * MultiShardUpdateDeleteMergeSupported(Query *originalQuery,
|
||||
static DeferredErrorMessage * MultiShardUpdateDeleteSupported(Query *originalQuery,
|
||||
PlannerRestrictionContext
|
||||
*
|
||||
plannerRestrictionContext);
|
||||
static DeferredErrorMessage * SingleShardUpdateDeleteSupported(Query *originalQuery,
|
||||
PlannerRestrictionContext *
|
||||
plannerRestrictionContext);
|
||||
static bool HasDangerousJoinUsing(List *rtableList, Node *jtnode);
|
||||
static bool MasterIrreducibleExpression(Node *expression, bool *varArgument,
|
||||
bool *badCoalesce);
|
||||
static bool MasterIrreducibleExpressionWalker(Node *expression, WalkerState *state);
|
||||
static bool MasterIrreducibleExpressionFunctionChecker(Oid func_id, void *context);
|
||||
static bool TargetEntryChangesValue(TargetEntry *targetEntry, Var *column,
|
||||
FromExpr *joinTree);
|
||||
static Job * RouterInsertJob(Query *originalQuery);
|
||||
static void ErrorIfNoShardsExist(CitusTableCacheEntry *cacheEntry);
|
||||
static DeferredErrorMessage * DeferErrorIfModifyView(Query *queryTree);
|
||||
static Job * CreateJob(Query *query);
|
||||
static Task * CreateTask(TaskType taskType);
|
||||
static Job * RouterJob(Query *originalQuery,
|
||||
PlannerRestrictionContext *plannerRestrictionContext,
|
||||
DeferredErrorMessage **planningError);
|
||||
static bool RelationPrunesToMultipleShards(List *relationShardList);
|
||||
static void NormalizeMultiRowInsertTargetList(Query *query);
|
||||
static void AppendNextDummyColReference(Alias *expendedReferenceNames);
|
||||
|
@ -445,7 +438,7 @@ ModifyQueryResultRelationId(Query *query)
|
|||
* ResultRelationOidForQuery returns the OID of the relation this is modified
|
||||
* by a given query.
|
||||
*/
|
||||
static Oid
|
||||
Oid
|
||||
ResultRelationOidForQuery(Query *query)
|
||||
{
|
||||
RangeTblEntry *resultRTE = rt_fetch(query->resultRelation, query->rtable);
|
||||
|
@ -512,6 +505,161 @@ IsTidColumn(Node *node)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* TargetlistAndFunctionsSupported implements a subset of what ModifyPartialQuerySupported
|
||||
* checks, that subset being checking what functions are allowed, if we are
|
||||
* updating distribution column, etc.
|
||||
* Note: This subset of checks are repeated for each MERGE modify action.
|
||||
*/
|
||||
DeferredErrorMessage *
|
||||
TargetlistAndFunctionsSupported(Oid resultRelationId, FromExpr *joinTree, Node *quals,
|
||||
List *targetList,
|
||||
CmdType commandType, List *returningList)
|
||||
{
|
||||
uint32 rangeTableId = 1;
|
||||
Var *partitionColumn = NULL;
|
||||
|
||||
if (IsCitusTable(resultRelationId))
|
||||
{
|
||||
partitionColumn = PartitionColumn(resultRelationId, rangeTableId);
|
||||
}
|
||||
|
||||
bool hasVarArgument = false; /* A STABLE function is passed a Var argument */
|
||||
bool hasBadCoalesce = false; /* CASE/COALESCE passed a mutable function */
|
||||
ListCell *targetEntryCell = NULL;
|
||||
|
||||
foreach(targetEntryCell, targetList)
|
||||
{
|
||||
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
|
||||
|
||||
/* skip resjunk entries: UPDATE adds some for ctid, etc. */
|
||||
if (targetEntry->resjunk)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
bool targetEntryPartitionColumn = false;
|
||||
AttrNumber targetColumnAttrNumber = InvalidAttrNumber;
|
||||
|
||||
/* reference tables do not have partition column */
|
||||
if (partitionColumn == NULL)
|
||||
{
|
||||
targetEntryPartitionColumn = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (commandType == CMD_UPDATE)
|
||||
{
|
||||
/*
|
||||
* Note that it is not possible to give an alias to
|
||||
* UPDATE table SET ...
|
||||
*/
|
||||
if (targetEntry->resname)
|
||||
{
|
||||
targetColumnAttrNumber = get_attnum(resultRelationId,
|
||||
targetEntry->resname);
|
||||
if (targetColumnAttrNumber == partitionColumn->varattno)
|
||||
{
|
||||
targetEntryPartitionColumn = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (commandType == CMD_UPDATE &&
|
||||
FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
|
||||
CitusIsVolatileFunction))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"functions used in UPDATE queries on distributed "
|
||||
"tables must not be VOLATILE",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
if (commandType == CMD_UPDATE && targetEntryPartitionColumn &&
|
||||
TargetEntryChangesValue(targetEntry, partitionColumn,
|
||||
joinTree))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"modifying the partition value of rows is not "
|
||||
"allowed",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
if (commandType == CMD_UPDATE &&
|
||||
MasterIrreducibleExpression((Node *) targetEntry->expr,
|
||||
&hasVarArgument, &hasBadCoalesce))
|
||||
{
|
||||
Assert(hasVarArgument || hasBadCoalesce);
|
||||
}
|
||||
|
||||
if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
|
||||
NodeIsFieldStore))
|
||||
{
|
||||
/* DELETE cannot do field indirection already */
|
||||
Assert(commandType == CMD_UPDATE || commandType == CMD_INSERT);
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"inserting or modifying composite type fields is not "
|
||||
"supported", NULL,
|
||||
"Use the column name to insert or update the composite "
|
||||
"type as a single value");
|
||||
}
|
||||
}
|
||||
|
||||
if (joinTree != NULL)
|
||||
{
|
||||
if (FindNodeMatchingCheckFunction((Node *) quals,
|
||||
CitusIsVolatileFunction))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"functions used in the WHERE/ON/WHEN clause of modification "
|
||||
"queries on distributed tables must not be VOLATILE",
|
||||
NULL, NULL);
|
||||
}
|
||||
else if (MasterIrreducibleExpression(quals, &hasVarArgument,
|
||||
&hasBadCoalesce))
|
||||
{
|
||||
Assert(hasVarArgument || hasBadCoalesce);
|
||||
}
|
||||
}
|
||||
|
||||
if (hasVarArgument)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"STABLE functions used in UPDATE queries "
|
||||
"cannot be called with column references",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
if (hasBadCoalesce)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"non-IMMUTABLE functions are not allowed in CASE or "
|
||||
"COALESCE statements",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
if (contain_mutable_functions((Node *) returningList))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"non-IMMUTABLE functions are not allowed in the "
|
||||
"RETURNING clause",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
if (quals != NULL &&
|
||||
nodeTag(quals) == T_CurrentOfExpr)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"cannot run DML queries with cursors", NULL,
|
||||
NULL);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ModifyPartialQuerySupported implements a subset of what ModifyQuerySupported checks,
|
||||
* that subset being what's necessary to check modifying CTEs for.
|
||||
|
@ -620,148 +768,21 @@ ModifyPartialQuerySupported(Query *queryTree, bool multiShardQuery,
|
|||
|
||||
Oid resultRelationId = ModifyQueryResultRelationId(queryTree);
|
||||
*distributedTableIdOutput = resultRelationId;
|
||||
uint32 rangeTableId = 1;
|
||||
|
||||
Var *partitionColumn = NULL;
|
||||
if (IsCitusTable(resultRelationId))
|
||||
{
|
||||
partitionColumn = PartitionColumn(resultRelationId, rangeTableId);
|
||||
}
|
||||
commandType = queryTree->commandType;
|
||||
if (commandType == CMD_INSERT || commandType == CMD_UPDATE ||
|
||||
commandType == CMD_DELETE)
|
||||
{
|
||||
bool hasVarArgument = false; /* A STABLE function is passed a Var argument */
|
||||
bool hasBadCoalesce = false; /* CASE/COALESCE passed a mutable function */
|
||||
FromExpr *joinTree = queryTree->jointree;
|
||||
ListCell *targetEntryCell = NULL;
|
||||
|
||||
foreach(targetEntryCell, queryTree->targetList)
|
||||
deferredError =
|
||||
TargetlistAndFunctionsSupported(resultRelationId,
|
||||
queryTree->jointree,
|
||||
queryTree->jointree->quals,
|
||||
queryTree->targetList,
|
||||
commandType,
|
||||
queryTree->returningList);
|
||||
if (deferredError)
|
||||
{
|
||||
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
|
||||
|
||||
/* skip resjunk entries: UPDATE adds some for ctid, etc. */
|
||||
if (targetEntry->resjunk)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
bool targetEntryPartitionColumn = false;
|
||||
AttrNumber targetColumnAttrNumber = InvalidAttrNumber;
|
||||
|
||||
/* reference tables do not have partition column */
|
||||
if (partitionColumn == NULL)
|
||||
{
|
||||
targetEntryPartitionColumn = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (commandType == CMD_UPDATE)
|
||||
{
|
||||
/*
|
||||
* Note that it is not possible to give an alias to
|
||||
* UPDATE table SET ...
|
||||
*/
|
||||
if (targetEntry->resname)
|
||||
{
|
||||
targetColumnAttrNumber = get_attnum(resultRelationId,
|
||||
targetEntry->resname);
|
||||
if (targetColumnAttrNumber == partitionColumn->varattno)
|
||||
{
|
||||
targetEntryPartitionColumn = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (commandType == CMD_UPDATE &&
|
||||
FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
|
||||
CitusIsVolatileFunction))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"functions used in UPDATE queries on distributed "
|
||||
"tables must not be VOLATILE",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
if (commandType == CMD_UPDATE && targetEntryPartitionColumn &&
|
||||
TargetEntryChangesValue(targetEntry, partitionColumn,
|
||||
queryTree->jointree))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"modifying the partition value of rows is not "
|
||||
"allowed",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
if (commandType == CMD_UPDATE &&
|
||||
MasterIrreducibleExpression((Node *) targetEntry->expr,
|
||||
&hasVarArgument, &hasBadCoalesce))
|
||||
{
|
||||
Assert(hasVarArgument || hasBadCoalesce);
|
||||
}
|
||||
|
||||
if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
|
||||
NodeIsFieldStore))
|
||||
{
|
||||
/* DELETE cannot do field indirection already */
|
||||
Assert(commandType == CMD_UPDATE || commandType == CMD_INSERT);
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"inserting or modifying composite type fields is not "
|
||||
"supported", NULL,
|
||||
"Use the column name to insert or update the composite "
|
||||
"type as a single value");
|
||||
}
|
||||
}
|
||||
|
||||
if (joinTree != NULL)
|
||||
{
|
||||
if (FindNodeMatchingCheckFunction((Node *) joinTree->quals,
|
||||
CitusIsVolatileFunction))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"functions used in the WHERE clause of modification "
|
||||
"queries on distributed tables must not be VOLATILE",
|
||||
NULL, NULL);
|
||||
}
|
||||
else if (MasterIrreducibleExpression(joinTree->quals, &hasVarArgument,
|
||||
&hasBadCoalesce))
|
||||
{
|
||||
Assert(hasVarArgument || hasBadCoalesce);
|
||||
}
|
||||
}
|
||||
|
||||
if (hasVarArgument)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"STABLE functions used in UPDATE queries "
|
||||
"cannot be called with column references",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
if (hasBadCoalesce)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"non-IMMUTABLE functions are not allowed in CASE or "
|
||||
"COALESCE statements",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
if (contain_mutable_functions((Node *) queryTree->returningList))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"non-IMMUTABLE functions are not allowed in the "
|
||||
"RETURNING clause",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
if (queryTree->jointree->quals != NULL &&
|
||||
nodeTag(queryTree->jointree->quals) == T_CurrentOfExpr)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"cannot run DML queries with cursors", NULL,
|
||||
NULL);
|
||||
return deferredError;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -866,7 +887,7 @@ IsLocallyAccessibleCitusLocalTable(Oid relationId)
|
|||
/*
|
||||
* NodeIsFieldStore returns true if given Node is a FieldStore object.
|
||||
*/
|
||||
static bool
|
||||
bool
|
||||
NodeIsFieldStore(Node *node)
|
||||
{
|
||||
return node && IsA(node, FieldStore);
|
||||
|
@ -888,7 +909,9 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer
|
|||
PlannerRestrictionContext *plannerRestrictionContext)
|
||||
{
|
||||
Oid distributedTableId = InvalidOid;
|
||||
DeferredErrorMessage *error = ModifyPartialQuerySupported(queryTree, multiShardQuery,
|
||||
|
||||
DeferredErrorMessage *error =
|
||||
ModifyPartialQuerySupported(queryTree, multiShardQuery,
|
||||
&distributedTableId);
|
||||
if (error)
|
||||
{
|
||||
|
@ -953,19 +976,12 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer
|
|||
*/
|
||||
}
|
||||
else if (rangeTableEntry->relkind == RELKIND_MATVIEW)
|
||||
{
|
||||
if (IsMergeAllowedOnRelation(originalQuery, rangeTableEntry))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"materialized views in "
|
||||
"modify queries are not supported",
|
||||
NULL, NULL);
|
||||
}
|
||||
}
|
||||
/* for other kinds of relations, check if it's distributed */
|
||||
else
|
||||
{
|
||||
|
@ -1065,7 +1081,7 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer
|
|||
|
||||
if (multiShardQuery)
|
||||
{
|
||||
errorMessage = MultiShardUpdateDeleteMergeSupported(
|
||||
errorMessage = MultiShardUpdateDeleteSupported(
|
||||
originalQuery,
|
||||
plannerRestrictionContext);
|
||||
}
|
||||
|
@ -1246,11 +1262,11 @@ ErrorIfOnConflictNotSupported(Query *queryTree)
|
|||
|
||||
|
||||
/*
|
||||
* MultiShardUpdateDeleteMergeSupported returns the error message if the update/delete is
|
||||
* MultiShardUpdateDeleteSupported returns the error message if the update/delete is
|
||||
* not pushdownable, otherwise it returns NULL.
|
||||
*/
|
||||
static DeferredErrorMessage *
|
||||
MultiShardUpdateDeleteMergeSupported(Query *originalQuery,
|
||||
MultiShardUpdateDeleteSupported(Query *originalQuery,
|
||||
PlannerRestrictionContext *plannerRestrictionContext)
|
||||
{
|
||||
DeferredErrorMessage *errorMessage = NULL;
|
||||
|
@ -1282,7 +1298,8 @@ MultiShardUpdateDeleteMergeSupported(Query *originalQuery,
|
|||
}
|
||||
else
|
||||
{
|
||||
errorMessage = DeferErrorIfUnsupportedSubqueryPushdown(originalQuery,
|
||||
errorMessage = DeferErrorIfUnsupportedSubqueryPushdown(
|
||||
originalQuery,
|
||||
plannerRestrictionContext);
|
||||
}
|
||||
|
||||
|
@ -1323,7 +1340,7 @@ SingleShardUpdateDeleteSupported(Query *originalQuery,
|
|||
* HasDangerousJoinUsing search jointree for unnamed JOIN USING. Check the
|
||||
* implementation of has_dangerous_join_using in ruleutils.
|
||||
*/
|
||||
static bool
|
||||
bool
|
||||
HasDangerousJoinUsing(List *rtableList, Node *joinTreeNode)
|
||||
{
|
||||
if (IsA(joinTreeNode, RangeTblRef))
|
||||
|
@ -1427,7 +1444,7 @@ IsMergeQuery(Query *query)
|
|||
* which do, but for now we just error out. That makes both the code and user-education
|
||||
* easier.
|
||||
*/
|
||||
static bool
|
||||
bool
|
||||
MasterIrreducibleExpression(Node *expression, bool *varArgument, bool *badCoalesce)
|
||||
{
|
||||
WalkerState data;
|
||||
|
@ -1575,7 +1592,7 @@ MasterIrreducibleExpressionFunctionChecker(Oid func_id, void *context)
|
|||
* expression is a value that is implied by the qualifiers of the join
|
||||
* tree, or the target entry sets a different column.
|
||||
*/
|
||||
static bool
|
||||
bool
|
||||
TargetEntryChangesValue(TargetEntry *targetEntry, Var *column, FromExpr *joinTree)
|
||||
{
|
||||
bool isColumnValueChanged = true;
|
||||
|
@ -1796,7 +1813,7 @@ ExtractFirstCitusTableId(Query *query)
|
|||
* RouterJob builds a Job to represent a single shard select/update/delete and
|
||||
* multiple shard update/delete queries.
|
||||
*/
|
||||
static Job *
|
||||
Job *
|
||||
RouterJob(Query *originalQuery, PlannerRestrictionContext *plannerRestrictionContext,
|
||||
DeferredErrorMessage **planningError)
|
||||
{
|
||||
|
@ -1846,8 +1863,8 @@ RouterJob(Query *originalQuery, PlannerRestrictionContext *plannerRestrictionCon
|
|||
if (*planningError)
|
||||
{
|
||||
/*
|
||||
* For MERGE, we do _not_ plan anything other than Router job, let's
|
||||
* not continue further down the lane in distributed planning, simply
|
||||
* For MERGE, we do _not_ plan any other router job than the MERGE job itself,
|
||||
* let's not continue further down the lane in distributed planning, simply
|
||||
* bail out.
|
||||
*/
|
||||
if (IsMergeQuery(originalQuery))
|
||||
|
@ -2320,9 +2337,20 @@ PlanRouterQuery(Query *originalQuery,
|
|||
}
|
||||
|
||||
Assert(UpdateOrDeleteOrMergeQuery(originalQuery));
|
||||
|
||||
if (IsMergeQuery(originalQuery))
|
||||
{
|
||||
planningError = MergeQuerySupported(originalQuery,
|
||||
isMultiShardQuery,
|
||||
plannerRestrictionContext);
|
||||
}
|
||||
else
|
||||
{
|
||||
planningError = ModifyQuerySupported(originalQuery, originalQuery,
|
||||
isMultiShardQuery,
|
||||
plannerRestrictionContext);
|
||||
}
|
||||
|
||||
if (planningError != NULL)
|
||||
{
|
||||
return planningError;
|
||||
|
@ -2643,7 +2671,7 @@ TargetShardIntervalForFastPathQuery(Query *query, bool *isMultiShardQuery,
|
|||
{
|
||||
Oid relationId = ExtractFirstCitusTableId(query);
|
||||
|
||||
if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
if (!HasDistributionKey(relationId))
|
||||
{
|
||||
/* we don't need to do shard pruning for non-distributed tables */
|
||||
return list_make1(LoadShardIntervalList(relationId));
|
||||
|
@ -2936,7 +2964,7 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError)
|
|||
Assert(query->commandType == CMD_INSERT);
|
||||
|
||||
/* reference tables and citus local tables can only have one shard */
|
||||
if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
if (!HasDistributionKeyCacheEntry(cacheEntry))
|
||||
{
|
||||
List *shardIntervalList = LoadShardIntervalList(distributedTableId);
|
||||
|
||||
|
@ -3477,7 +3505,7 @@ ExtractInsertPartitionKeyValue(Query *query)
|
|||
uint32 rangeTableId = 1;
|
||||
Const *singlePartitionValueConst = NULL;
|
||||
|
||||
if (IsCitusTableType(distributedTableId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
if (!HasDistributionKey(distributedTableId))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
@ -3589,6 +3617,8 @@ DeferErrorIfUnsupportedRouterPlannableSelectQuery(Query *query)
|
|||
bool hasDistributedTable = false;
|
||||
bool hasReferenceTable = false;
|
||||
|
||||
List *distributedRelationList = NIL;
|
||||
|
||||
ExtractRangeTableRelationWalker((Node *) query, &rangeTableRelationList);
|
||||
foreach(rangeTableRelationCell, rangeTableRelationList)
|
||||
{
|
||||
|
@ -3626,6 +3656,8 @@ DeferErrorIfUnsupportedRouterPlannableSelectQuery(Query *query)
|
|||
if (IsCitusTableType(distributedTableId, DISTRIBUTED_TABLE))
|
||||
{
|
||||
hasDistributedTable = true;
|
||||
distributedRelationList = lappend_oid(distributedRelationList,
|
||||
distributedTableId);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3680,6 +3712,15 @@ DeferErrorIfUnsupportedRouterPlannableSelectQuery(Query *query)
|
|||
NULL, NULL);
|
||||
}
|
||||
|
||||
if (!EnableNonColocatedRouterQueryPushdown &&
|
||||
!AllDistributedRelationsInListColocated(distributedRelationList))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"router planner does not support queries that "
|
||||
"reference non-colocated distributed tables",
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
#if PG_VERSION_NUM >= PG_VERSION_14
|
||||
DeferredErrorMessage *CTEWithSearchClauseError =
|
||||
ErrorIfQueryHasCTEWithSearchClause(query);
|
||||
|
@ -3797,8 +3838,7 @@ ErrorIfQueryHasUnroutableModifyingCTE(Query *queryTree)
|
|||
CitusTableCacheEntry *modificationTableCacheEntry =
|
||||
GetCitusTableCacheEntry(distributedTableId);
|
||||
|
||||
if (IsCitusTableTypeCacheEntry(modificationTableCacheEntry,
|
||||
CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
if (!HasDistributionKeyCacheEntry(modificationTableCacheEntry))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"cannot router plan modification of a non-distributed table",
|
||||
|
|
|
@ -168,7 +168,7 @@ AnchorRte(Query *subquery)
|
|||
{
|
||||
Oid relationId = currentRte->relid;
|
||||
|
||||
if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
|
||||
{
|
||||
/*
|
||||
* Non-distributed tables should not be the anchor rte since they
|
||||
|
|
|
@ -591,10 +591,16 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
|
|||
}
|
||||
else if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"complex joins are only supported when all distributed tables are "
|
||||
StringInfo errorMessage = makeStringInfo();
|
||||
bool isMergeCmd = IsMergeQuery(originalQuery);
|
||||
appendStringInfo(errorMessage,
|
||||
"%s"
|
||||
"only supported when all distributed tables are "
|
||||
"co-located and joined on their distribution columns",
|
||||
NULL, NULL);
|
||||
isMergeCmd ? "MERGE command is " : "complex joins are ");
|
||||
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
errorMessage->data, NULL, NULL);
|
||||
}
|
||||
|
||||
/* we shouldn't allow reference tables in the FROM clause when the query has sublinks */
|
||||
|
|
|
@ -151,7 +151,8 @@ static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass
|
|||
secondClass);
|
||||
static Var * PartitionKeyForRTEIdentityInQuery(Query *query, int targetRTEIndex,
|
||||
Index *partitionKeyIndex);
|
||||
static bool AllRelationsInRestrictionContextColocated(RelationRestrictionContext *
|
||||
static bool AllDistributedRelationsInRestrictionContextColocated(
|
||||
RelationRestrictionContext *
|
||||
restrictionContext);
|
||||
static bool IsNotSafeRestrictionToRecursivelyPlan(Node *node);
|
||||
static JoinRestrictionContext * FilterJoinRestrictionContext(
|
||||
|
@ -383,7 +384,7 @@ SafeToPushdownUnionSubquery(Query *originalQuery,
|
|||
return false;
|
||||
}
|
||||
|
||||
if (!AllRelationsInRestrictionContextColocated(restrictionContext))
|
||||
if (!AllDistributedRelationsInRestrictionContextColocated(restrictionContext))
|
||||
{
|
||||
/* distribution columns are equal, but tables are not co-located */
|
||||
return false;
|
||||
|
@ -703,8 +704,8 @@ EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList,
|
|||
int rteIdentity = GetRTEIdentity(relationRestriction->rte);
|
||||
|
||||
/* we shouldn't check for the equality of non-distributed tables */
|
||||
if (IsCitusTableType(relationRestriction->relationId,
|
||||
CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
if (IsCitusTable(relationRestriction->relationId) &&
|
||||
!HasDistributionKey(relationRestriction->relationId))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
@ -1919,22 +1920,66 @@ FindQueryContainingRTEIdentityInternal(Node *node,
|
|||
|
||||
|
||||
/*
|
||||
* AllRelationsInRestrictionContextColocated determines whether all of the relations in the
|
||||
* given relation restrictions list are co-located.
|
||||
* AllDistributedRelationsInRestrictionContextColocated determines whether all of the
|
||||
* distributed relations in the given relation restrictions list are co-located.
|
||||
*/
|
||||
static bool
|
||||
AllRelationsInRestrictionContextColocated(RelationRestrictionContext *restrictionContext)
|
||||
AllDistributedRelationsInRestrictionContextColocated(
|
||||
RelationRestrictionContext *restrictionContext)
|
||||
{
|
||||
RelationRestriction *relationRestriction = NULL;
|
||||
int initialColocationId = INVALID_COLOCATION_ID;
|
||||
List *relationIdList = NIL;
|
||||
|
||||
/* check whether all relations exists in the main restriction list */
|
||||
foreach_ptr(relationRestriction, restrictionContext->relationRestrictionList)
|
||||
{
|
||||
Oid relationId = relationRestriction->relationId;
|
||||
relationIdList = lappend_oid(relationIdList, relationRestriction->relationId);
|
||||
}
|
||||
|
||||
if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
return AllDistributedRelationsInListColocated(relationIdList);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AllDistributedRelationsInRTEListColocated determines whether all of the
|
||||
* distributed relations in the given RangeTableEntry list are co-located.
|
||||
*/
|
||||
bool
|
||||
AllDistributedRelationsInRTEListColocated(List *rangeTableEntryList)
|
||||
{
|
||||
RangeTblEntry *rangeTableEntry = NULL;
|
||||
List *relationIdList = NIL;
|
||||
|
||||
foreach_ptr(rangeTableEntry, rangeTableEntryList)
|
||||
{
|
||||
relationIdList = lappend_oid(relationIdList, rangeTableEntry->relid);
|
||||
}
|
||||
|
||||
return AllDistributedRelationsInListColocated(relationIdList);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AllDistributedRelationsInListColocated determines whether all of the
|
||||
* distributed relations in the given list are co-located.
|
||||
*/
|
||||
bool
|
||||
AllDistributedRelationsInListColocated(List *relationList)
|
||||
{
|
||||
int initialColocationId = INVALID_COLOCATION_ID;
|
||||
Oid relationId = InvalidOid;
|
||||
|
||||
foreach_oid(relationId, relationList)
|
||||
{
|
||||
if (!IsCitusTable(relationId))
|
||||
{
|
||||
/* not interested in Postgres tables */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!IsCitusTableType(relationId, DISTRIBUTED_TABLE))
|
||||
{
|
||||
/* not interested in non-distributed tables */
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -333,7 +333,7 @@ PruneShards(Oid relationId, Index rangeTableId, List *whereClauseList,
|
|||
}
|
||||
|
||||
/* short circuit for non-distributed tables such as reference table */
|
||||
if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
if (!HasDistributionKeyCacheEntry(cacheEntry))
|
||||
{
|
||||
prunedList = ShardArrayToList(cacheEntry->sortedShardIntervalArray,
|
||||
cacheEntry->shardIntervalArrayLength);
|
||||
|
|
|
@ -88,6 +88,8 @@ static const char *replicationSlotPrefix[] = {
|
|||
* IMPORTANT: All the subscription names should start with "citus_". Otherwise
|
||||
* our utility hook does not defend against non-superusers altering or dropping
|
||||
* them, which is important for security purposes.
|
||||
*
|
||||
* We should also keep these in sync with IsCitusShardTransferBackend().
|
||||
*/
|
||||
static const char *subscriptionPrefix[] = {
|
||||
[SHARD_MOVE] = "citus_shard_move_subscription_",
|
||||
|
@ -1338,7 +1340,9 @@ CreatePublications(MultiConnection *connection,
|
|||
worker->groupId,
|
||||
CLEANUP_ALWAYS);
|
||||
|
||||
ExecuteCriticalRemoteCommand(connection, DISABLE_DDL_PROPAGATION);
|
||||
ExecuteCriticalRemoteCommand(connection, createPublicationCommand->data);
|
||||
ExecuteCriticalRemoteCommand(connection, ENABLE_DDL_PROPAGATION);
|
||||
pfree(createPublicationCommand->data);
|
||||
pfree(createPublicationCommand);
|
||||
}
|
||||
|
|
|
@ -10,18 +10,27 @@
|
|||
#include "postgres.h"
|
||||
#include "distributed/shardinterval_utils.h"
|
||||
#include "distributed/shardsplit_shared_memory.h"
|
||||
#include "distributed/worker_shard_visibility.h"
|
||||
#include "distributed/worker_protocol.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/metadata/distobject.h"
|
||||
#include "replication/logical.h"
|
||||
#include "utils/typcache.h"
|
||||
|
||||
#include "utils/lsyscache.h"
|
||||
#include "catalog/pg_namespace.h"
|
||||
|
||||
extern void _PG_output_plugin_init(OutputPluginCallbacks *cb);
|
||||
static LogicalDecodeChangeCB pgoutputChangeCB;
|
||||
static LogicalDecodeChangeCB pgOutputPluginChangeCB;
|
||||
|
||||
#define InvalidRepOriginId 0
|
||||
|
||||
static HTAB *SourceToDestinationShardMap = NULL;
|
||||
static bool replication_origin_filter_cb(LogicalDecodingContext *ctx, RepOriginId
|
||||
origin_id);
|
||||
|
||||
/* Plugin callback */
|
||||
static void split_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
||||
static void shard_split_change_cb(LogicalDecodingContext *ctx,
|
||||
ReorderBufferTXN *txn,
|
||||
Relation relation, ReorderBufferChange *change);
|
||||
|
||||
/* Helper methods */
|
||||
|
@ -47,7 +56,8 @@ void
|
|||
_PG_output_plugin_init(OutputPluginCallbacks *cb)
|
||||
{
|
||||
LogicalOutputPluginInit plugin_init =
|
||||
(LogicalOutputPluginInit) (void *) load_external_function("pgoutput",
|
||||
(LogicalOutputPluginInit) (void *)
|
||||
load_external_function("pgoutput",
|
||||
"_PG_output_plugin_init",
|
||||
false, NULL);
|
||||
|
||||
|
@ -60,25 +70,56 @@ _PG_output_plugin_init(OutputPluginCallbacks *cb)
|
|||
plugin_init(cb);
|
||||
|
||||
/* actual pgoutput callback will be called with the appropriate destination shard */
|
||||
pgoutputChangeCB = cb->change_cb;
|
||||
cb->change_cb = split_change_cb;
|
||||
pgOutputPluginChangeCB = cb->change_cb;
|
||||
cb->change_cb = shard_split_change_cb;
|
||||
cb->filter_by_origin_cb = replication_origin_filter_cb;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* split_change function emits the incoming tuple change
|
||||
* replication_origin_filter_cb call back function filters out publication of changes
|
||||
* originated from any other node other than the current node. This is
|
||||
* identified by the "origin_id" of the changes. The origin_id is set to
|
||||
* a non-zero value in the origin node as part of WAL replication for internal
|
||||
* operations like shard split/moves/create_distributed_table etc.
|
||||
*/
|
||||
static bool
|
||||
replication_origin_filter_cb(LogicalDecodingContext *ctx, RepOriginId origin_id)
|
||||
{
|
||||
return (origin_id != InvalidRepOriginId);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* shard_split_change_cb function emits the incoming tuple change
|
||||
* to the appropriate destination shard.
|
||||
*/
|
||||
static void
|
||||
split_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
||||
shard_split_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
||||
Relation relation, ReorderBufferChange *change)
|
||||
{
|
||||
/*
|
||||
* If Citus has not been loaded yet, pass the changes
|
||||
* through to the undrelying decoder plugin.
|
||||
*/
|
||||
if (!CitusHasBeenLoaded())
|
||||
{
|
||||
pgOutputPluginChangeCB(ctx, txn, relation, change);
|
||||
return;
|
||||
}
|
||||
|
||||
/* check if the relation is publishable.*/
|
||||
if (!is_publishable_relation(relation))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
char *replicationSlotName = ctx->slot->data.name.data;
|
||||
if (replicationSlotName == NULL)
|
||||
{
|
||||
elog(ERROR, "Replication slot name is NULL!");
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize SourceToDestinationShardMap if not already initialized.
|
||||
|
@ -198,7 +239,7 @@ split_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
|||
}
|
||||
}
|
||||
|
||||
pgoutputChangeCB(ctx, txn, targetRelation, change);
|
||||
pgOutputPluginChangeCB(ctx, txn, targetRelation, change);
|
||||
RelationClose(targetRelation);
|
||||
}
|
||||
|
||||
|
|
|
@ -74,6 +74,7 @@
|
|||
#include "distributed/recursive_planning.h"
|
||||
#include "distributed/reference_table_utils.h"
|
||||
#include "distributed/relation_access_tracking.h"
|
||||
#include "distributed/replication_origin_session_utils.h"
|
||||
#include "distributed/run_from_same_connection.h"
|
||||
#include "distributed/shard_cleaner.h"
|
||||
#include "distributed/shard_transfer.h"
|
||||
|
@ -135,6 +136,8 @@ ReadColumnarOptions_type extern_ReadColumnarOptions = NULL;
|
|||
CppConcat(extern_, funcname) = \
|
||||
(typename) (void *) lookup_external_function(handle, # funcname)
|
||||
|
||||
#define CDC_DECODER_DYNAMIC_LIB_PATH "$libdir/citus_decoders:$libdir"
|
||||
|
||||
DEFINE_COLUMNAR_PASSTHROUGH_FUNC(columnar_handler)
|
||||
DEFINE_COLUMNAR_PASSTHROUGH_FUNC(alter_columnar_table_set)
|
||||
DEFINE_COLUMNAR_PASSTHROUGH_FUNC(alter_columnar_table_reset)
|
||||
|
@ -206,7 +209,7 @@ static bool StatisticsCollectionGucCheckHook(bool *newval, void **extra, GucSour
|
|||
source);
|
||||
static void CitusAuthHook(Port *port, int status);
|
||||
static bool IsSuperuser(char *userName);
|
||||
|
||||
static void AdjustDynamicLibraryPathForCdcDecoders(void);
|
||||
|
||||
static ClientAuthentication_hook_type original_client_auth_hook = NULL;
|
||||
|
||||
|
@ -359,6 +362,11 @@ static const struct config_enum_entry cpu_priority_options[] = {
|
|||
{ NULL, 0, false}
|
||||
};
|
||||
|
||||
static const struct config_enum_entry metadata_sync_mode_options[] = {
|
||||
{ "transactional", METADATA_SYNC_TRANSACTIONAL, false },
|
||||
{ "nontransactional", METADATA_SYNC_NON_TRANSACTIONAL, false },
|
||||
{ NULL, 0, false }
|
||||
};
|
||||
|
||||
/* *INDENT-ON* */
|
||||
|
||||
|
@ -469,6 +477,17 @@ _PG_init(void)
|
|||
InitializeLocallyReservedSharedConnections();
|
||||
InitializeClusterClockMem();
|
||||
|
||||
/*
|
||||
* Adjust the Dynamic Library Path to prepend citus_decodes to the dynamic
|
||||
* library path. This is needed to make sure that the citus decoders are
|
||||
* loaded before the default decoders for CDC.
|
||||
*/
|
||||
if (EnableChangeDataCapture)
|
||||
{
|
||||
AdjustDynamicLibraryPathForCdcDecoders();
|
||||
}
|
||||
|
||||
|
||||
/* initialize shard split shared memory handle management */
|
||||
InitializeShardSplitSMHandleManagement();
|
||||
|
||||
|
@ -536,6 +555,22 @@ _PG_init(void)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* PrependCitusDecodersToDynamicLibrayPath prepends the $libdir/citus_decoders
|
||||
* to the dynamic library path. This is needed to make sure that the citus
|
||||
* decoders are loaded before the default decoders for CDC.
|
||||
*/
|
||||
static void
|
||||
AdjustDynamicLibraryPathForCdcDecoders(void)
|
||||
{
|
||||
if (strcmp(Dynamic_library_path, "$libdir") == 0)
|
||||
{
|
||||
SetConfigOption("dynamic_library_path", CDC_DECODER_DYNAMIC_LIB_PATH,
|
||||
PGC_POSTMASTER, PGC_S_OVERRIDE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if PG_VERSION_NUM >= PG_VERSION_15
|
||||
|
||||
/*
|
||||
|
@ -1132,6 +1167,16 @@ RegisterCitusConfigVariables(void)
|
|||
GUC_STANDARD,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
"citus.enable_change_data_capture",
|
||||
gettext_noop("Enables using replication origin tracking for change data capture"),
|
||||
NULL,
|
||||
&EnableChangeDataCapture,
|
||||
false,
|
||||
PGC_USERSET,
|
||||
GUC_STANDARD,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
"citus.enable_cluster_clock",
|
||||
gettext_noop("When users explicitly call UDF citus_get_transaction_clock() "
|
||||
|
@ -1268,6 +1313,26 @@ RegisterCitusConfigVariables(void)
|
|||
GUC_NO_SHOW_ALL,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
"citus.enable_non_colocated_router_query_pushdown",
|
||||
gettext_noop("Enables router planner for the queries that reference "
|
||||
"non-colocated distributed tables."),
|
||||
gettext_noop("Normally, router planner planner is only enabled for "
|
||||
"the queries that reference colocated distributed tables "
|
||||
"because it is not guaranteed to have the target shards "
|
||||
"always on the same node, e.g., after rebalancing the "
|
||||
"shards. For this reason, while enabling this flag allows "
|
||||
"some degree of optimization for the queries that reference "
|
||||
"non-colocated distributed tables, it is not guaranteed "
|
||||
"that the same query will work after rebalancing the shards "
|
||||
"or altering the shard count of one of those distributed "
|
||||
"tables."),
|
||||
&EnableNonColocatedRouterQueryPushdown,
|
||||
true,
|
||||
PGC_USERSET,
|
||||
GUC_NO_SHOW_ALL,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
"citus.enable_repartition_joins",
|
||||
gettext_noop("Allows Citus to repartition data between nodes."),
|
||||
|
@ -1849,6 +1914,21 @@ RegisterCitusConfigVariables(void)
|
|||
GUC_UNIT_MS | GUC_NO_SHOW_ALL,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomEnumVariable(
|
||||
"citus.metadata_sync_mode",
|
||||
gettext_noop("Sets transaction mode for metadata syncs."),
|
||||
gettext_noop("metadata sync can be run inside a single coordinated "
|
||||
"transaction or with multiple small transactions in "
|
||||
"idempotent way. By default we sync metadata in single "
|
||||
"coordinated transaction. When we hit memory problems "
|
||||
"at workers, we have alternative nontransactional mode "
|
||||
"where we send each command with separate transaction."),
|
||||
&MetadataSyncTransMode,
|
||||
METADATA_SYNC_TRANSACTIONAL, metadata_sync_mode_options,
|
||||
PGC_SUSET,
|
||||
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomIntVariable(
|
||||
"citus.metadata_sync_retry_interval",
|
||||
gettext_noop("Sets the interval to retry failed metadata syncs."),
|
||||
|
@ -2406,7 +2486,6 @@ RegisterCitusConfigVariables(void)
|
|||
GUC_STANDARD,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
|
||||
/* warn about config items in the citus namespace that are not registered above */
|
||||
EmitWarningsOnPlaceholders("citus");
|
||||
|
||||
|
|
|
@ -1,4 +1,12 @@
|
|||
-- citus--11.2-1--11.3-1
|
||||
#include "udfs/repl_origin_helper/11.3-1.sql"
|
||||
#include "udfs/worker_adjust_identity_column_seq_ranges/11.3-1.sql"
|
||||
ALTER TABLE pg_catalog.pg_dist_authinfo REPLICA IDENTITY USING INDEX pg_dist_authinfo_identification_index;
|
||||
ALTER TABLE pg_catalog.pg_dist_partition REPLICA IDENTITY USING INDEX pg_dist_partition_logical_relid_index;
|
||||
ALTER TABLE pg_catalog.pg_dist_placement REPLICA IDENTITY USING INDEX pg_dist_placement_placementid_index;
|
||||
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy REPLICA IDENTITY USING INDEX pg_dist_rebalance_strategy_name_key;
|
||||
ALTER TABLE pg_catalog.pg_dist_shard REPLICA IDENTITY USING INDEX pg_dist_shard_shardid_index;
|
||||
ALTER TABLE pg_catalog.pg_dist_transaction REPLICA IDENTITY USING INDEX pg_dist_transaction_unique_constraint;
|
||||
|
||||
-- bump version to 11.3-1
|
||||
|
||||
#include "udfs/worker_drop_all_shell_tables/11.3-1.sql"
|
||||
#include "udfs/citus_internal_mark_node_not_synced/11.3-1.sql"
|
||||
|
|
|
@ -1,2 +1,22 @@
|
|||
-- citus--11.3-1--11.2-1
|
||||
-- this is an empty downgrade path since citus--11.2-1--11.3-1.sql is empty for now
|
||||
|
||||
DROP FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking();
|
||||
DROP FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking();
|
||||
DROP FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active();
|
||||
DROP FUNCTION IF EXISTS pg_catalog.worker_adjust_identity_column_seq_ranges(regclass);
|
||||
ALTER TABLE pg_catalog.pg_dist_authinfo REPLICA IDENTITY NOTHING;
|
||||
ALTER TABLE pg_catalog.pg_dist_partition REPLICA IDENTITY NOTHING;
|
||||
ALTER TABLE pg_catalog.pg_dist_placement REPLICA IDENTITY NOTHING;
|
||||
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy REPLICA IDENTITY NOTHING;
|
||||
ALTER TABLE pg_catalog.pg_dist_shard REPLICA IDENTITY NOTHING;
|
||||
ALTER TABLE pg_catalog.pg_dist_transaction REPLICA IDENTITY NOTHING;
|
||||
|
||||
ALTER TABLE pg_catalog.pg_dist_authinfo REPLICA IDENTITY NOTHING;
|
||||
ALTER TABLE pg_catalog.pg_dist_partition REPLICA IDENTITY NOTHING;
|
||||
ALTER TABLE pg_catalog.pg_dist_placement REPLICA IDENTITY NOTHING;
|
||||
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy REPLICA IDENTITY NOTHING;
|
||||
ALTER TABLE pg_catalog.pg_dist_shard REPLICA IDENTITY NOTHING;
|
||||
ALTER TABLE pg_catalog.pg_dist_transaction REPLICA IDENTITY NOTHING;
|
||||
|
||||
DROP PROCEDURE pg_catalog.worker_drop_all_shell_tables(bool);
|
||||
DROP FUNCTION pg_catalog.citus_internal_mark_node_not_synced(int, int);
|
||||
|
|
6
src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/11.3-1.sql
generated
Normal file
6
src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/11.3-1.sql
generated
Normal file
|
@ -0,0 +1,6 @@
|
|||
CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_mark_node_not_synced(parent_pid int, nodeid int)
|
||||
RETURNS VOID
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$citus_internal_mark_node_not_synced$$;
|
||||
COMMENT ON FUNCTION citus_internal_mark_node_not_synced(int, int)
|
||||
IS 'marks given node not synced by unsetting metadatasynced column at the start of the nontransactional sync.';
|
|
@ -0,0 +1,6 @@
|
|||
CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_mark_node_not_synced(parent_pid int, nodeid int)
|
||||
RETURNS VOID
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$citus_internal_mark_node_not_synced$$;
|
||||
COMMENT ON FUNCTION citus_internal_mark_node_not_synced(int, int)
|
||||
IS 'marks given node not synced by unsetting metadatasynced column at the start of the nontransactional sync.';
|
|
@ -0,0 +1,20 @@
|
|||
CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking()
|
||||
RETURNS void
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$citus_internal_start_replication_origin_tracking$$;
|
||||
COMMENT ON FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking()
|
||||
IS 'To start replication origin tracking for skipping publishing of duplicated events during internal data movements for CDC';
|
||||
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking()
|
||||
RETURNS void
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$citus_internal_stop_replication_origin_tracking$$;
|
||||
COMMENT ON FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking()
|
||||
IS 'To stop replication origin tracking for skipping publishing of duplicated events during internal data movements for CDC';
|
||||
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active()
|
||||
RETURNS boolean
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$citus_internal_is_replication_origin_tracking_active$$;
|
||||
COMMENT ON FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active()
|
||||
IS 'To check if replication origin tracking is active for skipping publishing of duplicated events during internal data movements for CDC';
|
|
@ -0,0 +1,20 @@
|
|||
CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking()
|
||||
RETURNS void
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$citus_internal_start_replication_origin_tracking$$;
|
||||
COMMENT ON FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking()
|
||||
IS 'To start replication origin tracking for skipping publishing of duplicated events during internal data movements for CDC';
|
||||
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking()
|
||||
RETURNS void
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$citus_internal_stop_replication_origin_tracking$$;
|
||||
COMMENT ON FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking()
|
||||
IS 'To stop replication origin tracking for skipping publishing of duplicated events during internal data movements for CDC';
|
||||
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active()
|
||||
RETURNS boolean
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$citus_internal_is_replication_origin_tracking_active$$;
|
||||
COMMENT ON FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active()
|
||||
IS 'To check if replication origin tracking is active for skipping publishing of duplicated events during internal data movements for CDC';
|
7
src/backend/distributed/sql/udfs/worker_adjust_identity_column_seq_ranges/11.3-1.sql
generated
Normal file
7
src/backend/distributed/sql/udfs/worker_adjust_identity_column_seq_ranges/11.3-1.sql
generated
Normal file
|
@ -0,0 +1,7 @@
|
|||
CREATE OR REPLACE FUNCTION pg_catalog.worker_adjust_identity_column_seq_ranges(regclass)
|
||||
RETURNS VOID
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$worker_adjust_identity_column_seq_ranges$$;
|
||||
COMMENT ON FUNCTION pg_catalog.worker_adjust_identity_column_seq_ranges(regclass)
|
||||
IS 'modify identity column seq ranges to produce globally unique values';
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
CREATE OR REPLACE FUNCTION pg_catalog.worker_adjust_identity_column_seq_ranges(regclass)
|
||||
RETURNS VOID
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$worker_adjust_identity_column_seq_ranges$$;
|
||||
COMMENT ON FUNCTION pg_catalog.worker_adjust_identity_column_seq_ranges(regclass)
|
||||
IS 'modify identity column seq ranges to produce globally unique values';
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
-- During metadata sync, when we send many ddls over single transaction, worker node can error due
|
||||
-- to reaching at max allocation block size for invalidation messages. To find a workaround for the problem,
|
||||
-- we added nontransactional metadata sync mode where we create many transaction while dropping shell tables
|
||||
-- via https://github.com/citusdata/citus/pull/6728.
|
||||
CREATE OR REPLACE PROCEDURE pg_catalog.worker_drop_all_shell_tables(singleTransaction bool DEFAULT true)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
DECLARE
|
||||
table_name text;
|
||||
BEGIN
|
||||
-- drop shell tables within single or multiple transactions according to the flag singleTransaction
|
||||
FOR table_name IN SELECT logicalrelid::regclass::text FROM pg_dist_partition
|
||||
LOOP
|
||||
PERFORM pg_catalog.worker_drop_shell_table(table_name);
|
||||
IF not singleTransaction THEN
|
||||
COMMIT;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END;
|
||||
$$;
|
||||
COMMENT ON PROCEDURE worker_drop_all_shell_tables(singleTransaction bool)
|
||||
IS 'drop all distributed tables only without the metadata within single transaction or '
|
||||
'multiple transaction specified by the flag singleTransaction';
|
|
@ -0,0 +1,23 @@
|
|||
-- During metadata sync, when we send many ddls over single transaction, worker node can error due
|
||||
-- to reaching at max allocation block size for invalidation messages. To find a workaround for the problem,
|
||||
-- we added nontransactional metadata sync mode where we create many transaction while dropping shell tables
|
||||
-- via https://github.com/citusdata/citus/pull/6728.
|
||||
CREATE OR REPLACE PROCEDURE pg_catalog.worker_drop_all_shell_tables(singleTransaction bool DEFAULT true)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
DECLARE
|
||||
table_name text;
|
||||
BEGIN
|
||||
-- drop shell tables within single or multiple transactions according to the flag singleTransaction
|
||||
FOR table_name IN SELECT logicalrelid::regclass::text FROM pg_dist_partition
|
||||
LOOP
|
||||
PERFORM pg_catalog.worker_drop_shell_table(table_name);
|
||||
IF not singleTransaction THEN
|
||||
COMMIT;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END;
|
||||
$$;
|
||||
COMMENT ON PROCEDURE worker_drop_all_shell_tables(singleTransaction bool)
|
||||
IS 'drop all distributed tables only without the metadata within single transaction or '
|
||||
'multiple transaction specified by the flag singleTransaction';
|
|
@ -49,26 +49,23 @@ activate_node_snapshot(PG_FUNCTION_ARGS)
|
|||
*/
|
||||
WorkerNode *dummyWorkerNode = GetFirstPrimaryWorkerNode();
|
||||
|
||||
List *updateLocalGroupCommand =
|
||||
list_make1(LocalGroupIdUpdateCommand(dummyWorkerNode->groupId));
|
||||
List *syncDistObjCommands = SyncDistributedObjectsCommandList(dummyWorkerNode);
|
||||
List *dropSnapshotCommands = NodeMetadataDropCommands();
|
||||
List *createSnapshotCommands = NodeMetadataCreateCommands();
|
||||
List *pgDistTableMetadataSyncCommands = PgDistTableMetadataSyncCommandList();
|
||||
/*
|
||||
* Create MetadataSyncContext which is used throughout nodes' activation.
|
||||
* As we set collectCommands to true, it would not create connections to workers.
|
||||
* Instead it would collect and return sync commands to be sent to workers.
|
||||
*/
|
||||
bool collectCommands = true;
|
||||
bool nodesAddedInSameTransaction = false;
|
||||
MetadataSyncContext *context = CreateMetadataSyncContext(list_make1(dummyWorkerNode),
|
||||
collectCommands,
|
||||
nodesAddedInSameTransaction);
|
||||
|
||||
List *activateNodeCommandList = NIL;
|
||||
ActivateNodeList(context);
|
||||
|
||||
List *activateNodeCommandList = context->collectedCommands;
|
||||
int activateNodeCommandIndex = 0;
|
||||
Oid ddlCommandTypeId = TEXTOID;
|
||||
|
||||
activateNodeCommandList = list_concat(activateNodeCommandList,
|
||||
updateLocalGroupCommand);
|
||||
activateNodeCommandList = list_concat(activateNodeCommandList, syncDistObjCommands);
|
||||
activateNodeCommandList = list_concat(activateNodeCommandList, dropSnapshotCommands);
|
||||
activateNodeCommandList = list_concat(activateNodeCommandList,
|
||||
createSnapshotCommands);
|
||||
activateNodeCommandList = list_concat(activateNodeCommandList,
|
||||
pgDistTableMetadataSyncCommands);
|
||||
|
||||
int activateNodeCommandCount = list_length(activateNodeCommandList);
|
||||
Datum *activateNodeCommandDatumArray = palloc0(activateNodeCommandCount *
|
||||
sizeof(Datum));
|
||||
|
|
|
@ -147,6 +147,26 @@ shard_placement_rebalance_array(PG_FUNCTION_ARGS)
|
|||
shardPlacementList = SortList(shardPlacementList, CompareShardPlacements);
|
||||
shardPlacementListList = lappend(shardPlacementListList, shardPlacementList);
|
||||
|
||||
List *unbalancedShards = NIL;
|
||||
ListCell *shardPlacementListCell = NULL;
|
||||
foreach(shardPlacementListCell, shardPlacementListList)
|
||||
{
|
||||
List *placementList = (List *) lfirst(shardPlacementListCell);
|
||||
|
||||
if (list_length(placementList) < list_length(workerNodeList))
|
||||
{
|
||||
unbalancedShards = list_concat(unbalancedShards,
|
||||
placementList);
|
||||
shardPlacementListList = foreach_delete_current(shardPlacementListList,
|
||||
shardPlacementListCell);
|
||||
}
|
||||
}
|
||||
|
||||
if (list_length(unbalancedShards) > 0)
|
||||
{
|
||||
shardPlacementListList = lappend(shardPlacementListList, unbalancedShards);
|
||||
}
|
||||
|
||||
rebalancePlanFunctions.context = &context;
|
||||
|
||||
/* sort the lists to make the function more deterministic */
|
||||
|
|
|
@ -1270,23 +1270,6 @@ MyBackendGotCancelledDueToDeadlock(bool clearState)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* MyBackendIsInDisributedTransaction returns true if MyBackendData
|
||||
* is in a distributed transaction.
|
||||
*/
|
||||
bool
|
||||
MyBackendIsInDisributedTransaction(void)
|
||||
{
|
||||
/* backend might not have used citus yet and thus not initialized backend data */
|
||||
if (!MyBackendData)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return IsInDistributedTransaction(MyBackendData);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ActiveDistributedTransactionNumbers returns a list of pointers to
|
||||
* transaction numbers of distributed transactions that are in progress
|
||||
|
@ -1452,6 +1435,21 @@ IsExternalClientBackend(void)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsRebalancerInitiatedBackend returns true if we are in a backend that citus
|
||||
* rebalancer initiated.
|
||||
*/
|
||||
bool
|
||||
IsCitusShardTransferBackend(void)
|
||||
{
|
||||
int prefixLength = strlen(CITUS_SHARD_TRANSFER_APPLICATION_NAME_PREFIX);
|
||||
|
||||
return strncmp(application_name,
|
||||
CITUS_SHARD_TRANSFER_APPLICATION_NAME_PREFIX,
|
||||
prefixLength) == 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DetermineCitusBackendType determines the type of backend based on the application_name.
|
||||
*/
|
||||
|
|
|
@ -195,7 +195,7 @@ RecordRelationAccessIfNonDistTable(Oid relationId, ShardPlacementAccessType acce
|
|||
* recursively calling RecordRelationAccessBase(), so becareful about
|
||||
* removing this check.
|
||||
*/
|
||||
if (!IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
if (IsCitusTable(relationId) && HasDistributionKey(relationId))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -732,8 +732,8 @@ CheckConflictingRelationAccesses(Oid relationId, ShardPlacementAccessType access
|
|||
|
||||
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
|
||||
|
||||
if (!(IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY) &&
|
||||
cacheEntry->referencingRelationsViaForeignKey != NIL))
|
||||
if (HasDistributionKeyCacheEntry(cacheEntry) ||
|
||||
cacheEntry->referencingRelationsViaForeignKey == NIL)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -931,7 +931,7 @@ HoldsConflictingLockWithReferencedRelations(Oid relationId, ShardPlacementAccess
|
|||
* We're only interested in foreign keys to reference tables and citus
|
||||
* local tables.
|
||||
*/
|
||||
if (!IsCitusTableType(referencedRelation, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
if (IsCitusTable(referencedRelation) && HasDistributionKey(referencedRelation))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
@ -993,7 +993,7 @@ HoldsConflictingLockWithReferencingRelations(Oid relationId, ShardPlacementAcces
|
|||
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
|
||||
bool holdsConflictingLocks = false;
|
||||
|
||||
Assert(IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY));
|
||||
Assert(!HasDistributionKeyCacheEntry(cacheEntry));
|
||||
|
||||
Oid referencingRelation = InvalidOid;
|
||||
foreach_oid(referencingRelation, cacheEntry->referencingRelationsViaForeignKey)
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include "distributed/multi_logical_replication.h"
|
||||
#include "distributed/multi_explain.h"
|
||||
#include "distributed/repartition_join_execution.h"
|
||||
#include "distributed/replication_origin_session_utils.h"
|
||||
#include "distributed/transaction_management.h"
|
||||
#include "distributed/placement_connection.h"
|
||||
#include "distributed/relation_access_tracking.h"
|
||||
|
@ -391,6 +392,9 @@ CoordinatedTransactionCallback(XactEvent event, void *arg)
|
|||
ResetGlobalVariables();
|
||||
ResetRelationAccessHash();
|
||||
|
||||
/* Reset any local replication origin session since transaction has been aborted.*/
|
||||
ResetReplicationOriginLocalSession();
|
||||
|
||||
/* empty the CitusXactCallbackContext to ensure we're not leaking memory */
|
||||
MemoryContextReset(CitusXactCallbackContext);
|
||||
|
||||
|
@ -715,6 +719,8 @@ CoordinatedSubTransactionCallback(SubXactEvent event, SubTransactionId subId,
|
|||
SetCreateCitusTransactionLevel(0);
|
||||
}
|
||||
|
||||
/* Reset any local replication origin session since subtransaction has been aborted.*/
|
||||
ResetReplicationOriginLocalSession();
|
||||
MemoryContextSwitchTo(previousContext);
|
||||
|
||||
break;
|
||||
|
|
|
@ -374,6 +374,54 @@ SendCommandListToWorkerOutsideTransactionWithConnection(MultiConnection *workerC
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* SendCommandListToWorkerListWithBareConnections sends the command list
|
||||
* over the specified bare connections. This function is mainly useful to
|
||||
* avoid opening an closing connections excessively by allowing reusing
|
||||
* connections to send multiple separate bare commands. The function
|
||||
* raises an error if any of the queries fail.
|
||||
*/
|
||||
void
|
||||
SendCommandListToWorkerListWithBareConnections(List *workerConnectionList,
|
||||
List *commandList)
|
||||
{
|
||||
Assert(!InCoordinatedTransaction());
|
||||
Assert(!GetCoordinatedTransactionShouldUse2PC());
|
||||
|
||||
if (list_length(commandList) == 0 || list_length(workerConnectionList) == 0)
|
||||
{
|
||||
/* nothing to do */
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* In order to avoid round-trips per query in queryStringList,
|
||||
* we join the string and send as a single command. Also,
|
||||
* if there is only a single command, avoid additional call to
|
||||
* StringJoin given that some strings can be quite large.
|
||||
*/
|
||||
char *stringToSend = (list_length(commandList) == 1) ?
|
||||
linitial(commandList) : StringJoin(commandList, ';');
|
||||
|
||||
/* send commands in parallel */
|
||||
MultiConnection *connection = NULL;
|
||||
foreach_ptr(connection, workerConnectionList)
|
||||
{
|
||||
int querySent = SendRemoteCommand(connection, stringToSend);
|
||||
if (querySent == 0)
|
||||
{
|
||||
ReportConnectionError(connection, ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
bool failOnError = true;
|
||||
foreach_ptr(connection, workerConnectionList)
|
||||
{
|
||||
ClearResults(connection, failOnError);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SendCommandListToWorkerInCoordinatedTransaction opens connection to the node
|
||||
* with the given nodeName and nodePort. The commands are sent as part of the
|
||||
|
@ -390,6 +438,8 @@ SendMetadataCommandListToWorkerListInCoordinatedTransaction(List *workerNodeList
|
|||
return;
|
||||
}
|
||||
|
||||
ErrorIfAnyMetadataNodeOutOfSync(workerNodeList);
|
||||
|
||||
UseCoordinatedTransaction();
|
||||
|
||||
List *connectionList = NIL;
|
||||
|
|
|
@ -442,8 +442,7 @@ ShardsIntervalsEqual(ShardInterval *leftShardInterval, ShardInterval *rightShard
|
|||
{
|
||||
return HashPartitionedShardIntervalsEqual(leftShardInterval, rightShardInterval);
|
||||
}
|
||||
else if (IsCitusTableType(leftShardInterval->relationId,
|
||||
CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
else if (!HasDistributionKey(leftShardInterval->relationId))
|
||||
{
|
||||
/*
|
||||
* Reference tables has only a single shard and all reference tables
|
||||
|
|
|
@ -503,12 +503,11 @@ GetReferenceTableColocationId()
|
|||
|
||||
|
||||
/*
|
||||
* DeleteAllReplicatedTablePlacementsFromNodeGroup function iterates over
|
||||
* list of reference and replicated hash distributed tables and deletes
|
||||
* all placements from pg_dist_placement table for given group.
|
||||
* GetAllReplicatedTableList returns all tables which has replicated placements.
|
||||
* i.e. (all reference tables) + (distributed tables with more than 1 placements)
|
||||
*/
|
||||
void
|
||||
DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly)
|
||||
List *
|
||||
GetAllReplicatedTableList(void)
|
||||
{
|
||||
List *referenceTableList = CitusTableTypeIdList(REFERENCE_TABLE);
|
||||
List *replicatedMetadataSyncedDistributedTableList =
|
||||
|
@ -517,13 +516,25 @@ DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly)
|
|||
List *replicatedTableList =
|
||||
list_concat(referenceTableList, replicatedMetadataSyncedDistributedTableList);
|
||||
|
||||
/* if there are no reference tables, we do not need to do anything */
|
||||
if (list_length(replicatedTableList) == 0)
|
||||
{
|
||||
return;
|
||||
return replicatedTableList;
|
||||
}
|
||||
|
||||
StringInfo deletePlacementCommand = makeStringInfo();
|
||||
|
||||
/*
|
||||
* ReplicatedPlacementsForNodeGroup filters all replicated placements for given
|
||||
* node group id.
|
||||
*/
|
||||
List *
|
||||
ReplicatedPlacementsForNodeGroup(int32 groupId)
|
||||
{
|
||||
List *replicatedTableList = GetAllReplicatedTableList();
|
||||
|
||||
if (list_length(replicatedTableList) == 0)
|
||||
{
|
||||
return NIL;
|
||||
}
|
||||
|
||||
List *replicatedPlacementsForNodeGroup = NIL;
|
||||
Oid replicatedTableId = InvalidOid;
|
||||
foreach_oid(replicatedTableId, replicatedTableList)
|
||||
{
|
||||
|
@ -538,25 +549,104 @@ DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly)
|
|||
continue;
|
||||
}
|
||||
|
||||
replicatedPlacementsForNodeGroup = list_concat(replicatedPlacementsForNodeGroup,
|
||||
placements);
|
||||
}
|
||||
|
||||
return replicatedPlacementsForNodeGroup;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DeleteShardPlacementCommand returns a command for deleting given placement from
|
||||
* metadata.
|
||||
*/
|
||||
char *
|
||||
DeleteShardPlacementCommand(uint64 placementId)
|
||||
{
|
||||
StringInfo deletePlacementCommand = makeStringInfo();
|
||||
appendStringInfo(deletePlacementCommand,
|
||||
"DELETE FROM pg_catalog.pg_dist_placement "
|
||||
"WHERE placementid = " UINT64_FORMAT, placementId);
|
||||
return deletePlacementCommand->data;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DeleteAllReplicatedTablePlacementsFromNodeGroup function iterates over
|
||||
* list of reference and replicated hash distributed tables and deletes
|
||||
* all placements from pg_dist_placement table for given group.
|
||||
*/
|
||||
void
|
||||
DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly)
|
||||
{
|
||||
List *replicatedPlacementListForGroup = ReplicatedPlacementsForNodeGroup(groupId);
|
||||
|
||||
/* if there are no replicated tables for the group, we do not need to do anything */
|
||||
if (list_length(replicatedPlacementListForGroup) == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
GroupShardPlacement *placement = NULL;
|
||||
foreach_ptr(placement, placements)
|
||||
foreach_ptr(placement, replicatedPlacementListForGroup)
|
||||
{
|
||||
LockShardDistributionMetadata(placement->shardId, ExclusiveLock);
|
||||
|
||||
if (!localOnly)
|
||||
{
|
||||
char *deletePlacementCommand =
|
||||
DeleteShardPlacementCommand(placement->placementId);
|
||||
|
||||
SendCommandToWorkersWithMetadata(deletePlacementCommand);
|
||||
}
|
||||
|
||||
DeleteShardPlacementRow(placement->placementId);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DeleteAllReplicatedTablePlacementsFromNodeGroupViaMetadataContext does the same as
|
||||
* DeleteAllReplicatedTablePlacementsFromNodeGroup except it uses metadataSyncContext for
|
||||
* connections.
|
||||
*/
|
||||
void
|
||||
DeleteAllReplicatedTablePlacementsFromNodeGroupViaMetadataContext(
|
||||
MetadataSyncContext *context, int32 groupId, bool localOnly)
|
||||
{
|
||||
List *replicatedPlacementListForGroup = ReplicatedPlacementsForNodeGroup(groupId);
|
||||
|
||||
/* if there are no replicated tables for the group, we do not need to do anything */
|
||||
if (list_length(replicatedPlacementListForGroup) == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
MemoryContext oldContext = MemoryContextSwitchTo(context->context);
|
||||
GroupShardPlacement *placement = NULL;
|
||||
foreach_ptr(placement, replicatedPlacementListForGroup)
|
||||
{
|
||||
LockShardDistributionMetadata(placement->shardId, ExclusiveLock);
|
||||
|
||||
if (!localOnly)
|
||||
{
|
||||
resetStringInfo(deletePlacementCommand);
|
||||
appendStringInfo(deletePlacementCommand,
|
||||
"DELETE FROM pg_catalog.pg_dist_placement "
|
||||
"WHERE placementid = " UINT64_FORMAT,
|
||||
placement->placementId);
|
||||
char *deletePlacementCommand =
|
||||
DeleteShardPlacementCommand(placement->placementId);
|
||||
|
||||
SendCommandToWorkersWithMetadata(deletePlacementCommand->data);
|
||||
SendOrCollectCommandListToMetadataNodes(context,
|
||||
list_make1(deletePlacementCommand));
|
||||
}
|
||||
|
||||
/* do not execute local transaction if we collect commands */
|
||||
if (!MetadataSyncCollectsCommands(context))
|
||||
{
|
||||
DeleteShardPlacementRow(placement->placementId);
|
||||
}
|
||||
|
||||
ResetMetadataSyncMemoryContext(context);
|
||||
}
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,239 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* replication_origin_session_utils.c
|
||||
* Functions for managing replication origin session.
|
||||
*
|
||||
* Copyright (c) Citus Data, Inc.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "distributed/replication_origin_session_utils.h"
|
||||
#include "distributed/remote_commands.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "miscadmin.h"
|
||||
|
||||
static bool IsRemoteReplicationOriginSessionSetup(MultiConnection *connection);
|
||||
|
||||
static void SetupMemoryContextResetReplicationOriginHandler(void);
|
||||
|
||||
static void SetupReplicationOriginSessionHelper(bool isContexResetSetupNeeded);
|
||||
|
||||
static inline bool IsLocalReplicationOriginSessionActive(void);
|
||||
|
||||
PG_FUNCTION_INFO_V1(citus_internal_start_replication_origin_tracking);
|
||||
PG_FUNCTION_INFO_V1(citus_internal_stop_replication_origin_tracking);
|
||||
PG_FUNCTION_INFO_V1(citus_internal_is_replication_origin_tracking_active);
|
||||
|
||||
/*
|
||||
* This variable is used to remember the replication origin id of the current session
|
||||
* before resetting it to DoNotReplicateId in SetupReplicationOriginLocalSession.
|
||||
*/
|
||||
static RepOriginId OriginalOriginId = InvalidRepOriginId;
|
||||
|
||||
/*
|
||||
* Setting that controls whether replication origin tracking is enabled
|
||||
*/
|
||||
bool EnableChangeDataCapture = false;
|
||||
|
||||
|
||||
/* citus_internal_start_replication_origin_tracking starts a new replication origin session
|
||||
* in the local node. This function is used to avoid publishing the WAL records to the
|
||||
* replication slot by setting replication origin to DoNotReplicateId in WAL records.
|
||||
* It remembers the previous replication origin for the current session which will be
|
||||
* used to reset the replication origin to the previous value when the session ends.
|
||||
*/
|
||||
Datum
|
||||
citus_internal_start_replication_origin_tracking(PG_FUNCTION_ARGS)
|
||||
{
|
||||
if (!EnableChangeDataCapture)
|
||||
{
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
SetupReplicationOriginSessionHelper(false);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
|
||||
/* citus_internal_stop_replication_origin_tracking ends the current replication origin session
|
||||
* in the local node. This function is used to reset the replication origin to the
|
||||
* earlier value of replication origin.
|
||||
*/
|
||||
Datum
|
||||
citus_internal_stop_replication_origin_tracking(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ResetReplicationOriginLocalSession();
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
|
||||
/* citus_internal_is_replication_origin_tracking_active checks if the current replication origin
|
||||
* session is active in the local node.
|
||||
*/
|
||||
Datum
|
||||
citus_internal_is_replication_origin_tracking_active(PG_FUNCTION_ARGS)
|
||||
{
|
||||
bool result = IsLocalReplicationOriginSessionActive();
|
||||
PG_RETURN_BOOL(result);
|
||||
}
|
||||
|
||||
|
||||
/* IsLocalReplicationOriginSessionActive checks if the current replication origin
|
||||
* session is active in the local node.
|
||||
*/
|
||||
inline bool
|
||||
IsLocalReplicationOriginSessionActive(void)
|
||||
{
|
||||
return (replorigin_session_origin == DoNotReplicateId);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SetupMemoryContextResetReplicationOriginHandler registers a callback function
|
||||
* that resets the replication origin session in case of any error for the current
|
||||
* memory context.
|
||||
*/
|
||||
static void
|
||||
SetupMemoryContextResetReplicationOriginHandler()
|
||||
{
|
||||
MemoryContextCallback *replicationOriginResetCallback = palloc0(
|
||||
sizeof(MemoryContextCallback));
|
||||
replicationOriginResetCallback->func =
|
||||
ResetReplicationOriginLocalSessionCallbackHandler;
|
||||
replicationOriginResetCallback->arg = NULL;
|
||||
MemoryContextRegisterResetCallback(CurrentMemoryContext,
|
||||
replicationOriginResetCallback);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SetupReplicationOriginSessionHelper sets up a new replication origin session in a
|
||||
* local session. It takes an argument isContexResetSetupNeeded to decide whether
|
||||
* to register a callback function that resets the replication origin session in case
|
||||
* of any error for the current memory context.
|
||||
*/
|
||||
static void
|
||||
SetupReplicationOriginSessionHelper(bool isContexResetSetupNeeded)
|
||||
{
|
||||
if (!EnableChangeDataCapture)
|
||||
{
|
||||
return;
|
||||
}
|
||||
OriginalOriginId = replorigin_session_origin;
|
||||
replorigin_session_origin = DoNotReplicateId;
|
||||
if (isContexResetSetupNeeded)
|
||||
{
|
||||
SetupMemoryContextResetReplicationOriginHandler();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SetupReplicationOriginLocalSession sets up a new replication origin session in a
|
||||
* local session.
|
||||
*/
|
||||
void
|
||||
SetupReplicationOriginLocalSession()
|
||||
{
|
||||
SetupReplicationOriginSessionHelper(true);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ResetReplicationOriginLocalSession resets the replication origin session in a
|
||||
* local node.
|
||||
*/
|
||||
void
|
||||
ResetReplicationOriginLocalSession(void)
|
||||
{
|
||||
if (replorigin_session_origin != DoNotReplicateId)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
replorigin_session_origin = OriginalOriginId;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ResetReplicationOriginLocalSessionCallbackHandler is a callback function that
|
||||
* resets the replication origin session in a local node. This is used to register
|
||||
* with MemoryContextRegisterResetCallback to reset the replication origin session
|
||||
* in case of any error for the given memory context.
|
||||
*/
|
||||
void
|
||||
ResetReplicationOriginLocalSessionCallbackHandler(void *arg)
|
||||
{
|
||||
ResetReplicationOriginLocalSession();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SetupReplicationOriginRemoteSession sets up a new replication origin session in a
|
||||
* remote session. The identifier is used to create a unique replication origin name
|
||||
* for the session in the remote node.
|
||||
*/
|
||||
void
|
||||
SetupReplicationOriginRemoteSession(MultiConnection *connection)
|
||||
{
|
||||
if (!EnableChangeDataCapture)
|
||||
{
|
||||
return;
|
||||
}
|
||||
if (connection != NULL && !IsRemoteReplicationOriginSessionSetup(connection))
|
||||
{
|
||||
StringInfo replicationOriginSessionSetupQuery = makeStringInfo();
|
||||
appendStringInfo(replicationOriginSessionSetupQuery,
|
||||
"select pg_catalog.citus_internal_start_replication_origin_tracking();");
|
||||
ExecuteCriticalRemoteCommand(connection,
|
||||
replicationOriginSessionSetupQuery->data);
|
||||
connection->isReplicationOriginSessionSetup = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ResetReplicationOriginRemoteSession resets the replication origin session in a
|
||||
* remote node.
|
||||
*/
|
||||
void
|
||||
ResetReplicationOriginRemoteSession(MultiConnection *connection)
|
||||
{
|
||||
if (connection != NULL && connection->isReplicationOriginSessionSetup)
|
||||
{
|
||||
StringInfo replicationOriginSessionResetQuery = makeStringInfo();
|
||||
appendStringInfo(replicationOriginSessionResetQuery,
|
||||
"select pg_catalog.citus_internal_stop_replication_origin_tracking();");
|
||||
ExecuteCriticalRemoteCommand(connection,
|
||||
replicationOriginSessionResetQuery->data);
|
||||
connection->isReplicationOriginSessionSetup = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsRemoteReplicationOriginSessionSetup checks if the replication origin is setup
|
||||
* already in the remote session by calliing the UDF
|
||||
* citus_internal_is_replication_origin_tracking_active(). This is also remembered
|
||||
* in the connection object to avoid calling the UDF again next time.
|
||||
*/
|
||||
static bool
|
||||
IsRemoteReplicationOriginSessionSetup(MultiConnection *connection)
|
||||
{
|
||||
if (connection->isReplicationOriginSessionSetup)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
StringInfo isReplicationOriginSessionSetupQuery = makeStringInfo();
|
||||
appendStringInfo(isReplicationOriginSessionSetupQuery,
|
||||
"SELECT pg_catalog.citus_internal_is_replication_origin_tracking_active()");
|
||||
bool result =
|
||||
ExecuteRemoteCommandAndCheckResult(connection,
|
||||
isReplicationOriginSessionSetupQuery->data,
|
||||
"t");
|
||||
|
||||
connection->isReplicationOriginSessionSetup = result;
|
||||
return result;
|
||||
}
|
|
@ -503,45 +503,6 @@ SetLocktagForShardDistributionMetadata(int64 shardId, LOCKTAG *tag)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* LockPlacementCleanup takes an exclusive lock to ensure that only one process
|
||||
* can cleanup placements at the same time.
|
||||
*/
|
||||
void
|
||||
LockPlacementCleanup(void)
|
||||
{
|
||||
LOCKTAG tag;
|
||||
const bool sessionLock = false;
|
||||
const bool dontWait = false;
|
||||
|
||||
/* Moves acquire lock with a constant operation id CITUS_SHARD_MOVE.
|
||||
* This will change as we add support for parallel moves.
|
||||
*/
|
||||
SET_LOCKTAG_CITUS_OPERATION(tag, CITUS_SHARD_MOVE);
|
||||
(void) LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TryLockPlacementCleanup takes an exclusive lock to ensure that only one
|
||||
* process can cleanup placements at the same time.
|
||||
*/
|
||||
bool
|
||||
TryLockPlacementCleanup(void)
|
||||
{
|
||||
LOCKTAG tag;
|
||||
const bool sessionLock = false;
|
||||
const bool dontWait = true;
|
||||
|
||||
/* Moves acquire lock with a constant operation id CITUS_SHARD_MOVE.
|
||||
* This will change as we add support for parallel moves.
|
||||
*/
|
||||
SET_LOCKTAG_CITUS_OPERATION(tag, CITUS_SHARD_MOVE);
|
||||
bool lockAcquired = LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait);
|
||||
return lockAcquired;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* LockReferencedReferenceShardDistributionMetadata acquires shard distribution
|
||||
* metadata locks with the given lock mode on the reference tables which has a
|
||||
|
|
|
@ -223,8 +223,7 @@ ShardIndex(ShardInterval *shardInterval)
|
|||
* currently it is not required.
|
||||
*/
|
||||
if (!IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) &&
|
||||
!IsCitusTableTypeCacheEntry(
|
||||
cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
HasDistributionKeyCacheEntry(cacheEntry))
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("finding index of a given shard is only supported for "
|
||||
|
@ -233,7 +232,7 @@ ShardIndex(ShardInterval *shardInterval)
|
|||
}
|
||||
|
||||
/* short-circuit for reference tables */
|
||||
if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
if (!HasDistributionKeyCacheEntry(cacheEntry))
|
||||
{
|
||||
/*
|
||||
* Reference tables and citus local tables have only a single shard,
|
||||
|
@ -333,7 +332,7 @@ FindShardIntervalIndex(Datum searchedValue, CitusTableCacheEntry *cacheEntry)
|
|||
shardIndex = CalculateUniformHashRangeIndex(hashedValue, shardCount);
|
||||
}
|
||||
}
|
||||
else if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
||||
else if (!HasDistributionKeyCacheEntry(cacheEntry))
|
||||
{
|
||||
/* non-distributed tables have a single shard, all values mapped to that shard */
|
||||
Assert(shardCount == 1);
|
||||
|
|
|
@ -35,8 +35,22 @@
|
|||
#include "distributed/worker_create_or_replace.h"
|
||||
#include "distributed/worker_protocol.h"
|
||||
|
||||
|
||||
/*
|
||||
* OnCollisionAction describes what to do when the created object
|
||||
* and existing object do not match.
|
||||
*/
|
||||
typedef enum OnCollisionAction
|
||||
{
|
||||
ON_COLLISION_RENAME,
|
||||
ON_COLLISION_DROP
|
||||
} OnCollisionAction;
|
||||
|
||||
|
||||
static List * CreateStmtListByObjectAddress(const ObjectAddress *address);
|
||||
static bool CompareStringList(List *list1, List *list2);
|
||||
static OnCollisionAction GetOnCollisionAction(const ObjectAddress *address);
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(worker_create_or_replace_object);
|
||||
PG_FUNCTION_INFO_V1(worker_create_or_replace_object_array);
|
||||
|
@ -192,7 +206,8 @@ WorkerCreateOrReplaceObject(List *sqlStatements)
|
|||
/*
|
||||
* Object with name from statement is already found locally, check if states are
|
||||
* identical. If objects differ we will rename the old object (non- destructively)
|
||||
* as to make room to create the new object according to the spec sent.
|
||||
* or drop it (if safe) as to make room to create the new object according to the
|
||||
* spec sent.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
@ -213,11 +228,22 @@ WorkerCreateOrReplaceObject(List *sqlStatements)
|
|||
return false;
|
||||
}
|
||||
|
||||
char *newName = GenerateBackupNameForCollision(address);
|
||||
Node *utilityStmt = NULL;
|
||||
|
||||
RenameStmt *renameStmt = CreateRenameStatement(address, newName);
|
||||
const char *sqlRenameStmt = DeparseTreeNode((Node *) renameStmt);
|
||||
ProcessUtilityParseTree((Node *) renameStmt, sqlRenameStmt,
|
||||
if (GetOnCollisionAction(address) == ON_COLLISION_DROP)
|
||||
{
|
||||
/* drop the existing object */
|
||||
utilityStmt = (Node *) CreateDropStmt(address);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* rename the existing object */
|
||||
char *newName = GenerateBackupNameForCollision(address);
|
||||
utilityStmt = (Node *) CreateRenameStatement(address, newName);
|
||||
}
|
||||
|
||||
const char *commandString = DeparseTreeNode(utilityStmt);
|
||||
ProcessUtilityParseTree(utilityStmt, commandString,
|
||||
PROCESS_UTILITY_QUERY,
|
||||
NULL, None_Receiver, NULL);
|
||||
}
|
||||
|
@ -286,6 +312,11 @@ CreateStmtListByObjectAddress(const ObjectAddress *address)
|
|||
return list_make1(GetFunctionDDLCommand(address->objectId, false));
|
||||
}
|
||||
|
||||
case OCLASS_PUBLICATION:
|
||||
{
|
||||
return list_make1(CreatePublicationDDLCommand(address->objectId));
|
||||
}
|
||||
|
||||
case OCLASS_TSCONFIG:
|
||||
{
|
||||
List *stmts = GetCreateTextSearchConfigStatements(address);
|
||||
|
@ -312,6 +343,37 @@ CreateStmtListByObjectAddress(const ObjectAddress *address)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* GetOnCollisionAction decides what to do when the object already exists.
|
||||
*/
|
||||
static OnCollisionAction
|
||||
GetOnCollisionAction(const ObjectAddress *address)
|
||||
{
|
||||
switch (getObjectClass(address))
|
||||
{
|
||||
case OCLASS_PUBLICATION:
|
||||
{
|
||||
/*
|
||||
* We prefer to drop publications because they can be
|
||||
* harmful (cause update/delete failures) and are relatively
|
||||
* safe to drop.
|
||||
*/
|
||||
return ON_COLLISION_DROP;
|
||||
}
|
||||
|
||||
case OCLASS_COLLATION:
|
||||
case OCLASS_PROC:
|
||||
case OCLASS_TSCONFIG:
|
||||
case OCLASS_TSDICT:
|
||||
case OCLASS_TYPE:
|
||||
default:
|
||||
{
|
||||
return ON_COLLISION_RENAME;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GenerateBackupNameForCollision calculate a backup name for a given object by its
|
||||
* address. This name should be used when renaming an existing object before creating the
|
||||
|
@ -362,6 +424,64 @@ GenerateBackupNameForCollision(const ObjectAddress *address)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* CreateDropPublicationStmt creates a DROP PUBLICATION statement for the
|
||||
* publication at the given address.
|
||||
*/
|
||||
static DropStmt *
|
||||
CreateDropPublicationStmt(const ObjectAddress *address)
|
||||
{
|
||||
Assert(address->classId == PublicationRelationId);
|
||||
|
||||
DropStmt *dropStmt = makeNode(DropStmt);
|
||||
dropStmt->removeType = OBJECT_PUBLICATION;
|
||||
dropStmt->behavior = DROP_RESTRICT;
|
||||
|
||||
HeapTuple publicationTuple =
|
||||
SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(address->objectId));
|
||||
|
||||
if (!HeapTupleIsValid(publicationTuple))
|
||||
{
|
||||
ereport(ERROR, (errmsg("cannot find publication with oid: %d",
|
||||
address->objectId)));
|
||||
}
|
||||
|
||||
Form_pg_publication publicationForm =
|
||||
(Form_pg_publication) GETSTRUCT(publicationTuple);
|
||||
|
||||
char *publicationName = NameStr(publicationForm->pubname);
|
||||
dropStmt->objects = list_make1(makeString(publicationName));
|
||||
|
||||
ReleaseSysCache(publicationTuple);
|
||||
|
||||
return dropStmt;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CreateDropStmt returns a DROP statement for the given object.
|
||||
*/
|
||||
DropStmt *
|
||||
CreateDropStmt(const ObjectAddress *address)
|
||||
{
|
||||
switch (getObjectClass(address))
|
||||
{
|
||||
case OCLASS_PUBLICATION:
|
||||
{
|
||||
return CreateDropPublicationStmt(address);
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ereport(ERROR, (errmsg("unsupported object to construct a drop statement"),
|
||||
errdetail("unable to generate a parsetree for the drop")));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CreateRenameTypeStmt creates a rename statement for a type based on its ObjectAddress.
|
||||
* The rename statement will rename the existing object on its address to the value
|
||||
|
|
|
@ -70,6 +70,7 @@ static void AlterSequenceMinMax(Oid sequenceId, char *schemaName, char *sequence
|
|||
PG_FUNCTION_INFO_V1(worker_apply_shard_ddl_command);
|
||||
PG_FUNCTION_INFO_V1(worker_apply_inter_shard_ddl_command);
|
||||
PG_FUNCTION_INFO_V1(worker_apply_sequence_command);
|
||||
PG_FUNCTION_INFO_V1(worker_adjust_identity_column_seq_ranges);
|
||||
PG_FUNCTION_INFO_V1(worker_append_table_to_shard);
|
||||
PG_FUNCTION_INFO_V1(worker_nextval);
|
||||
|
||||
|
@ -133,6 +134,60 @@ worker_apply_inter_shard_ddl_command(PG_FUNCTION_ARGS)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* worker_adjust_identity_column_seq_ranges takes a table oid, runs an ALTER SEQUENCE statement
|
||||
* for each identity column to adjust the minvalue and maxvalue of the sequence owned by
|
||||
* identity column such that the sequence creates globally unique values.
|
||||
* We use table oid instead of sequence name to avoid any potential conflicts between sequences of different tables. This way, we can safely iterate through identity columns on a specific table without any issues. While this may introduce a small amount of business logic to workers, it's a much safer approach overall.
|
||||
*/
|
||||
Datum
|
||||
worker_adjust_identity_column_seq_ranges(PG_FUNCTION_ARGS)
|
||||
{
|
||||
CheckCitusVersion(ERROR);
|
||||
|
||||
Oid tableRelationId = PG_GETARG_OID(0);
|
||||
|
||||
EnsureTableOwner(tableRelationId);
|
||||
|
||||
Relation tableRelation = relation_open(tableRelationId, AccessShareLock);
|
||||
TupleDesc tableTupleDesc = RelationGetDescr(tableRelation);
|
||||
|
||||
bool missingSequenceOk = false;
|
||||
|
||||
for (int attributeIndex = 0; attributeIndex < tableTupleDesc->natts;
|
||||
attributeIndex++)
|
||||
{
|
||||
Form_pg_attribute attributeForm = TupleDescAttr(tableTupleDesc,
|
||||
attributeIndex);
|
||||
|
||||
/* skip dropped columns */
|
||||
if (attributeForm->attisdropped)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (attributeForm->attidentity)
|
||||
{
|
||||
Oid sequenceOid = getIdentitySequence(tableRelationId,
|
||||
attributeForm->attnum,
|
||||
missingSequenceOk);
|
||||
|
||||
Oid sequenceSchemaOid = get_rel_namespace(sequenceOid);
|
||||
char *sequenceSchemaName = get_namespace_name(sequenceSchemaOid);
|
||||
char *sequenceName = get_rel_name(sequenceOid);
|
||||
Oid sequenceTypeId = pg_get_sequencedef(sequenceOid)->seqtypid;
|
||||
|
||||
AlterSequenceMinMax(sequenceOid, sequenceSchemaName, sequenceName,
|
||||
sequenceTypeId);
|
||||
}
|
||||
}
|
||||
|
||||
relation_close(tableRelation, NoLock);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* worker_apply_sequence_command takes a CREATE SEQUENCE command string, runs the
|
||||
* CREATE SEQUENCE command then creates and runs an ALTER SEQUENCE statement
|
||||
|
|
|
@ -351,18 +351,17 @@ ShouldHideShardsInternal(void)
|
|||
return false;
|
||||
}
|
||||
}
|
||||
else if (MyBackendType != B_BACKEND)
|
||||
else if (MyBackendType != B_BACKEND && MyBackendType != B_WAL_SENDER)
|
||||
{
|
||||
/*
|
||||
* We are aiming only to hide shards from client
|
||||
* backends or certain background workers(see above),
|
||||
* not backends like walsender or checkpointer.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
if (IsCitusInternalBackend() || IsRebalancerInternalBackend() ||
|
||||
IsCitusRunCommandBackend())
|
||||
IsCitusRunCommandBackend() || IsCitusShardTransferBackend())
|
||||
{
|
||||
/* we never hide shards from Citus */
|
||||
return false;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue