mirror of https://github.com/citusdata/citus.git
Merge branch 'main' into sqlancer-test-gha
commit
201d976a3b
|
@ -6,7 +6,7 @@ orbs:
|
||||||
parameters:
|
parameters:
|
||||||
image_suffix:
|
image_suffix:
|
||||||
type: string
|
type: string
|
||||||
default: '-vc4b1573'
|
default: '-v087ecd7'
|
||||||
pg13_version:
|
pg13_version:
|
||||||
type: string
|
type: string
|
||||||
default: '13.10'
|
default: '13.10'
|
||||||
|
@ -201,6 +201,9 @@ jobs:
|
||||||
- run:
|
- run:
|
||||||
name: 'Check if all GUCs are sorted alphabetically'
|
name: 'Check if all GUCs are sorted alphabetically'
|
||||||
command: ci/check_gucs_are_alphabetically_sorted.sh
|
command: ci/check_gucs_are_alphabetically_sorted.sh
|
||||||
|
- run:
|
||||||
|
name: 'Check for missing downgrade scripts'
|
||||||
|
command: ci/check_migration_files.sh
|
||||||
|
|
||||||
check-sql-snapshots:
|
check-sql-snapshots:
|
||||||
docker:
|
docker:
|
||||||
|
@ -266,6 +269,41 @@ jobs:
|
||||||
- coverage:
|
- coverage:
|
||||||
flags: 'test_<< parameters.old_pg_major >>_<< parameters.new_pg_major >>,upgrade'
|
flags: 'test_<< parameters.old_pg_major >>_<< parameters.new_pg_major >>,upgrade'
|
||||||
|
|
||||||
|
test-pytest:
|
||||||
|
description: Runs pytest based tests
|
||||||
|
parameters:
|
||||||
|
pg_major:
|
||||||
|
description: 'postgres major version'
|
||||||
|
type: integer
|
||||||
|
image:
|
||||||
|
description: 'docker image to use as for the tests'
|
||||||
|
type: string
|
||||||
|
default: citus/failtester
|
||||||
|
image_tag:
|
||||||
|
description: 'docker image tag to use'
|
||||||
|
type: string
|
||||||
|
docker:
|
||||||
|
- image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>'
|
||||||
|
working_directory: /home/circleci/project
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- attach_workspace:
|
||||||
|
at: .
|
||||||
|
- install_extension:
|
||||||
|
pg_major: << parameters.pg_major >>
|
||||||
|
- configure
|
||||||
|
- enable_core
|
||||||
|
- run:
|
||||||
|
name: 'Run pytest'
|
||||||
|
command: |
|
||||||
|
gosu circleci \
|
||||||
|
make -C src/test/regress check-pytest
|
||||||
|
no_output_timeout: 2m
|
||||||
|
- stack_trace
|
||||||
|
- coverage:
|
||||||
|
flags: 'test_<< parameters.pg_major >>,pytest'
|
||||||
|
|
||||||
|
|
||||||
test-arbitrary-configs:
|
test-arbitrary-configs:
|
||||||
description: Runs tests on arbitrary configs
|
description: Runs tests on arbitrary configs
|
||||||
parallelism: 6
|
parallelism: 6
|
||||||
|
@ -452,6 +490,10 @@ jobs:
|
||||||
pg_major: << parameters.pg_major >>
|
pg_major: << parameters.pg_major >>
|
||||||
- configure
|
- configure
|
||||||
- enable_core
|
- enable_core
|
||||||
|
- run:
|
||||||
|
name: 'Install DBI.pm'
|
||||||
|
command: |
|
||||||
|
apt-get update && apt-get install libdbi-perl && apt-get install libdbd-pg-perl
|
||||||
- run:
|
- run:
|
||||||
name: 'Run Test'
|
name: 'Run Test'
|
||||||
command: |
|
command: |
|
||||||
|
@ -551,7 +593,7 @@ jobs:
|
||||||
testForDebugging="<< parameters.test >>"
|
testForDebugging="<< parameters.test >>"
|
||||||
|
|
||||||
if [ -z "$testForDebugging" ]; then
|
if [ -z "$testForDebugging" ]; then
|
||||||
detected_changes=$(git diff origin/main... --name-only --diff-filter=AM | (grep 'src/test/regress/sql/.*.sql\|src/test/regress/spec/.*.spec' || true))
|
detected_changes=$(git diff origin/main... --name-only --diff-filter=AM | (grep 'src/test/regress/sql/.*\.sql\|src/test/regress/spec/.*\.spec\|src/test/regress/citus_tests/test/test_.*\.py' || true))
|
||||||
tests=${detected_changes}
|
tests=${detected_changes}
|
||||||
else
|
else
|
||||||
tests=$testForDebugging;
|
tests=$testForDebugging;
|
||||||
|
@ -854,38 +896,30 @@ workflows:
|
||||||
image: citus/failtester
|
image: citus/failtester
|
||||||
make: check-failure
|
make: check-failure
|
||||||
|
|
||||||
- tap-test-citus: &tap-test-citus-13
|
- test-pytest:
|
||||||
name: 'test-13_tap-recovery'
|
name: 'test-13_pytest'
|
||||||
suite: recovery
|
|
||||||
pg_major: 13
|
pg_major: 13
|
||||||
image_tag: '<< pipeline.parameters.pg13_version >>'
|
image_tag: '<< pipeline.parameters.pg13_version >>'
|
||||||
requires: [build-13]
|
requires: [build-13]
|
||||||
- tap-test-citus:
|
|
||||||
<<: *tap-test-citus-13
|
|
||||||
name: 'test-13_tap-columnar-freezing'
|
|
||||||
suite: columnar_freezing
|
|
||||||
|
|
||||||
- tap-test-citus: &tap-test-citus-14
|
- test-pytest:
|
||||||
name: 'test-14_tap-recovery'
|
name: 'test-14_pytest'
|
||||||
suite: recovery
|
|
||||||
pg_major: 14
|
pg_major: 14
|
||||||
image_tag: '<< pipeline.parameters.pg14_version >>'
|
image_tag: '<< pipeline.parameters.pg14_version >>'
|
||||||
requires: [build-14]
|
requires: [build-14]
|
||||||
- tap-test-citus:
|
|
||||||
<<: *tap-test-citus-14
|
|
||||||
name: 'test-14_tap-columnar-freezing'
|
|
||||||
suite: columnar_freezing
|
|
||||||
|
|
||||||
- tap-test-citus: &tap-test-citus-15
|
- test-pytest:
|
||||||
name: 'test-15_tap-recovery'
|
name: 'test-15_pytest'
|
||||||
suite: recovery
|
|
||||||
pg_major: 15
|
pg_major: 15
|
||||||
image_tag: '<< pipeline.parameters.pg15_version >>'
|
image_tag: '<< pipeline.parameters.pg15_version >>'
|
||||||
requires: [build-15]
|
requires: [build-15]
|
||||||
|
|
||||||
- tap-test-citus:
|
- tap-test-citus:
|
||||||
<<: *tap-test-citus-15
|
name: 'test-15_tap-cdc'
|
||||||
name: 'test-15_tap-columnar-freezing'
|
suite: cdc
|
||||||
suite: columnar_freezing
|
pg_major: 15
|
||||||
|
image_tag: '<< pipeline.parameters.pg15_version >>'
|
||||||
|
requires: [build-15]
|
||||||
|
|
||||||
- test-arbitrary-configs:
|
- test-arbitrary-configs:
|
||||||
name: 'test-13_check-arbitrary-configs'
|
name: 'test-13_check-arbitrary-configs'
|
||||||
|
@ -936,8 +970,6 @@ workflows:
|
||||||
- test-13_check-follower-cluster
|
- test-13_check-follower-cluster
|
||||||
- test-13_check-columnar
|
- test-13_check-columnar
|
||||||
- test-13_check-columnar-isolation
|
- test-13_check-columnar-isolation
|
||||||
- test-13_tap-recovery
|
|
||||||
- test-13_tap-columnar-freezing
|
|
||||||
- test-13_check-failure
|
- test-13_check-failure
|
||||||
- test-13_check-enterprise
|
- test-13_check-enterprise
|
||||||
- test-13_check-enterprise-isolation
|
- test-13_check-enterprise-isolation
|
||||||
|
@ -956,8 +988,6 @@ workflows:
|
||||||
- test-14_check-follower-cluster
|
- test-14_check-follower-cluster
|
||||||
- test-14_check-columnar
|
- test-14_check-columnar
|
||||||
- test-14_check-columnar-isolation
|
- test-14_check-columnar-isolation
|
||||||
- test-14_tap-recovery
|
|
||||||
- test-14_tap-columnar-freezing
|
|
||||||
- test-14_check-failure
|
- test-14_check-failure
|
||||||
- test-14_check-enterprise
|
- test-14_check-enterprise
|
||||||
- test-14_check-enterprise-isolation
|
- test-14_check-enterprise-isolation
|
||||||
|
@ -976,8 +1006,6 @@ workflows:
|
||||||
- test-15_check-follower-cluster
|
- test-15_check-follower-cluster
|
||||||
- test-15_check-columnar
|
- test-15_check-columnar
|
||||||
- test-15_check-columnar-isolation
|
- test-15_check-columnar-isolation
|
||||||
- test-15_tap-recovery
|
|
||||||
- test-15_tap-columnar-freezing
|
|
||||||
- test-15_check-failure
|
- test-15_check-failure
|
||||||
- test-15_check-enterprise
|
- test-15_check-enterprise
|
||||||
- test-15_check-enterprise-isolation
|
- test-15_check-enterprise-isolation
|
||||||
|
|
|
@ -17,7 +17,7 @@ trim_trailing_whitespace = true
|
||||||
insert_final_newline = unset
|
insert_final_newline = unset
|
||||||
trim_trailing_whitespace = unset
|
trim_trailing_whitespace = unset
|
||||||
|
|
||||||
[*.{sql,sh,py}]
|
[*.{sql,sh,py,toml}]
|
||||||
indent_style = space
|
indent_style = space
|
||||||
indent_size = 4
|
indent_size = 4
|
||||||
tab_width = 4
|
tab_width = 4
|
||||||
|
|
3
.flake8
3
.flake8
|
@ -1,7 +1,6 @@
|
||||||
[flake8]
|
[flake8]
|
||||||
# E203 is ignored for black
|
# E203 is ignored for black
|
||||||
# E402 is ignored because of te way we do relative imports
|
extend-ignore = E203
|
||||||
extend-ignore = E203, E402
|
|
||||||
# black will truncate to 88 characters usually, but long string literals it
|
# black will truncate to 88 characters usually, but long string literals it
|
||||||
# might keep. That's fine in most cases unless it gets really excessive.
|
# might keep. That's fine in most cases unless it gets really excessive.
|
||||||
max-line-length = 150
|
max-line-length = 150
|
||||||
|
|
|
@ -157,7 +157,6 @@ jobs:
|
||||||
|
|
||||||
apt-get update -y
|
apt-get update -y
|
||||||
## Install required packages to execute packaging tools for deb based distros
|
## Install required packages to execute packaging tools for deb based distros
|
||||||
apt install python3-dev python3-pip -y
|
apt-get install python3-dev python3-pip -y
|
||||||
sudo apt-get purge -y python3-yaml
|
apt-get purge -y python3-yaml
|
||||||
python3 -m pip install --upgrade pip setuptools==57.5.0
|
|
||||||
./.github/packaging/validate_build_output.sh "deb"
|
./.github/packaging/validate_build_output.sh "deb"
|
||||||
|
|
|
@ -283,6 +283,14 @@ actually run in CI. This is most commonly forgotten for newly added CI tests
|
||||||
that the developer only ran locally. It also checks that all CI scripts have a
|
that the developer only ran locally. It also checks that all CI scripts have a
|
||||||
section in this `README.md` file and that they include `ci/ci_helpers.sh`.
|
section in this `README.md` file and that they include `ci/ci_helpers.sh`.
|
||||||
|
|
||||||
|
## `check_migration_files.sh`
|
||||||
|
|
||||||
|
A branch that touches a set of upgrade scripts is also expected to touch
|
||||||
|
corresponding downgrade scripts as well. If this script fails, read the output
|
||||||
|
and make sure you update the downgrade scripts in the printed list. If you
|
||||||
|
really don't need a downgrade to run any SQL. You can write a comment in the
|
||||||
|
file explaining why a downgrade step is not necessary.
|
||||||
|
|
||||||
## `disallow_c_comments_in_migrations.sh`
|
## `disallow_c_comments_in_migrations.sh`
|
||||||
|
|
||||||
We do not use C-style comments in migration files as the stripped
|
We do not use C-style comments in migration files as the stripped
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
#! /bin/bash
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source ci/ci_helpers.sh
|
||||||
|
|
||||||
|
# This file checks for the existence of downgrade scripts for every upgrade script that is changed in the branch.
|
||||||
|
|
||||||
|
# create list of migration files for upgrades
|
||||||
|
upgrade_files=$(git diff --name-only origin/main | { grep "src/backend/distributed/sql/citus--.*sql" || exit 0 ; })
|
||||||
|
downgrade_files=$(git diff --name-only origin/main | { grep "src/backend/distributed/sql/downgrades/citus--.*sql" || exit 0 ; })
|
||||||
|
ret_value=0
|
||||||
|
|
||||||
|
for file in $upgrade_files
|
||||||
|
do
|
||||||
|
# There should always be 2 matches, and no need to avoid splitting here
|
||||||
|
# shellcheck disable=SC2207
|
||||||
|
versions=($(grep --only-matching --extended-regexp "[0-9]+\.[0-9]+[-.][0-9]+" <<< "$file"))
|
||||||
|
|
||||||
|
from_version=${versions[0]};
|
||||||
|
to_version=${versions[1]};
|
||||||
|
|
||||||
|
downgrade_migration_file="src/backend/distributed/sql/downgrades/citus--$to_version--$from_version.sql"
|
||||||
|
|
||||||
|
# check for the existence of migration scripts
|
||||||
|
if [[ $(grep --line-regexp --count "$downgrade_migration_file" <<< "$downgrade_files") == 0 ]]
|
||||||
|
then
|
||||||
|
echo "$file is updated, but $downgrade_migration_file is not updated in branch"
|
||||||
|
ret_value=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
exit $ret_value;
|
|
@ -3,3 +3,35 @@ profile = 'black'
|
||||||
|
|
||||||
[tool.black]
|
[tool.black]
|
||||||
include = '(src/test/regress/bin/diff-filter|\.pyi?|\.ipynb)$'
|
include = '(src/test/regress/bin/diff-filter|\.pyi?|\.ipynb)$'
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
addopts = [
|
||||||
|
"--import-mode=importlib",
|
||||||
|
"--showlocals",
|
||||||
|
"--tb=short",
|
||||||
|
]
|
||||||
|
pythonpath = 'src/test/regress/citus_tests'
|
||||||
|
asyncio_mode = 'auto'
|
||||||
|
|
||||||
|
# Make test discovery quicker from the root dir of the repo
|
||||||
|
testpaths = ['src/test/regress/citus_tests/test']
|
||||||
|
|
||||||
|
# Make test discovery quicker from other directories than root directory
|
||||||
|
norecursedirs = [
|
||||||
|
'*.egg',
|
||||||
|
'.*',
|
||||||
|
'build',
|
||||||
|
'venv',
|
||||||
|
'ci',
|
||||||
|
'vendor',
|
||||||
|
'backend',
|
||||||
|
'bin',
|
||||||
|
'include',
|
||||||
|
'tmp_*',
|
||||||
|
'results',
|
||||||
|
'expected',
|
||||||
|
'sql',
|
||||||
|
'spec',
|
||||||
|
'data',
|
||||||
|
'__pycache__',
|
||||||
|
]
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
# The directory used to store columnar sql files after pre-processing them
|
||||||
|
# with 'cpp' in build-time, see src/backend/columnar/Makefile.
|
||||||
|
/build/
|
|
@ -10,14 +10,51 @@ OBJS += \
|
||||||
MODULE_big = citus_columnar
|
MODULE_big = citus_columnar
|
||||||
EXTENSION = citus_columnar
|
EXTENSION = citus_columnar
|
||||||
|
|
||||||
columnar_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/*.sql))
|
template_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/*.sql))
|
||||||
columnar_downgrade_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/downgrades/*.sql))
|
template_downgrade_sql_files = $(patsubst $(citus_abs_srcdir)/sql/downgrades/%,%,$(wildcard $(citus_abs_srcdir)/sql/downgrades/*.sql))
|
||||||
DATA = $(columnar_sql_files) \
|
generated_sql_files = $(patsubst %,$(citus_abs_srcdir)/build/%,$(template_sql_files))
|
||||||
$(columnar_downgrade_sql_files)
|
generated_downgrade_sql_files += $(patsubst %,$(citus_abs_srcdir)/build/sql/%,$(template_downgrade_sql_files))
|
||||||
|
|
||||||
|
DATA_built = $(generated_sql_files)
|
||||||
|
|
||||||
PG_CPPFLAGS += -I$(libpq_srcdir) -I$(safestringlib_srcdir)/include
|
PG_CPPFLAGS += -I$(libpq_srcdir) -I$(safestringlib_srcdir)/include
|
||||||
|
|
||||||
include $(citus_top_builddir)/Makefile.global
|
include $(citus_top_builddir)/Makefile.global
|
||||||
|
|
||||||
.PHONY: install-all
|
SQL_DEPDIR=.deps/sql
|
||||||
|
SQL_BUILDDIR=build/sql
|
||||||
|
|
||||||
|
$(generated_sql_files): $(citus_abs_srcdir)/build/%: %
|
||||||
|
@mkdir -p $(citus_abs_srcdir)/$(SQL_DEPDIR) $(citus_abs_srcdir)/$(SQL_BUILDDIR)
|
||||||
|
@# -MF is used to store dependency files(.Po) in another directory for separation
|
||||||
|
@# -MT is used to change the target of the rule emitted by dependency generation.
|
||||||
|
@# -P is used to inhibit generation of linemarkers in the output from the preprocessor.
|
||||||
|
@# -undef is used to not predefine any system-specific or GCC-specific macros.
|
||||||
|
@# `man cpp` for further information
|
||||||
|
cd $(citus_abs_srcdir) && cpp -undef -w -P -MMD -MP -MF$(SQL_DEPDIR)/$(*F).Po -MT$@ $< > $@
|
||||||
|
|
||||||
|
$(generated_downgrade_sql_files): $(citus_abs_srcdir)/build/sql/%: sql/downgrades/%
|
||||||
|
@mkdir -p $(citus_abs_srcdir)/$(SQL_DEPDIR) $(citus_abs_srcdir)/$(SQL_BUILDDIR)
|
||||||
|
@# -MF is used to store dependency files(.Po) in another directory for separation
|
||||||
|
@# -MT is used to change the target of the rule emitted by dependency generation.
|
||||||
|
@# -P is used to inhibit generation of linemarkers in the output from the preprocessor.
|
||||||
|
@# -undef is used to not predefine any system-specific or GCC-specific macros.
|
||||||
|
@# `man cpp` for further information
|
||||||
|
cd $(citus_abs_srcdir) && cpp -undef -w -P -MMD -MP -MF$(SQL_DEPDIR)/$(*F).Po -MT$@ $< > $@
|
||||||
|
|
||||||
|
.PHONY: install install-downgrades install-all
|
||||||
|
|
||||||
|
cleanup-before-install:
|
||||||
|
rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus_columnar.control
|
||||||
|
rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/columnar--*
|
||||||
|
rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus_columnar--*
|
||||||
|
|
||||||
|
install: cleanup-before-install
|
||||||
|
|
||||||
|
# install and install-downgrades should be run sequentially
|
||||||
install-all: install
|
install-all: install
|
||||||
|
$(MAKE) install-downgrades
|
||||||
|
|
||||||
|
install-downgrades: $(generated_downgrade_sql_files)
|
||||||
|
$(INSTALL_DATA) $(generated_downgrade_sql_files) '$(DESTDIR)$(datadir)/$(datamoduledir)/'
|
||||||
|
|
||||||
|
|
|
@ -1 +1,19 @@
|
||||||
-- citus_columnar--11.1-1--11.2-1
|
-- citus_columnar--11.1-1--11.2-1
|
||||||
|
|
||||||
|
#include "udfs/columnar_ensure_am_depends_catalog/11.2-1.sql"
|
||||||
|
|
||||||
|
DELETE FROM pg_depend
|
||||||
|
WHERE classid = 'pg_am'::regclass::oid
|
||||||
|
AND objid IN (select oid from pg_am where amname = 'columnar')
|
||||||
|
AND objsubid = 0
|
||||||
|
AND refclassid = 'pg_class'::regclass::oid
|
||||||
|
AND refobjid IN (
|
||||||
|
'columnar_internal.stripe_first_row_number_idx'::regclass::oid,
|
||||||
|
'columnar_internal.chunk_group_pkey'::regclass::oid,
|
||||||
|
'columnar_internal.chunk_pkey'::regclass::oid,
|
||||||
|
'columnar_internal.options_pkey'::regclass::oid,
|
||||||
|
'columnar_internal.stripe_first_row_number_idx'::regclass::oid,
|
||||||
|
'columnar_internal.stripe_pkey'::regclass::oid
|
||||||
|
)
|
||||||
|
AND refobjsubid = 0
|
||||||
|
AND deptype = 'n';
|
||||||
|
|
|
@ -1 +1,4 @@
|
||||||
-- citus_columnar--11.2-1--11.1-1
|
-- citus_columnar--11.2-1--11.1-1
|
||||||
|
|
||||||
|
-- Note that we intentionally do not re-insert the pg_depend records that we
|
||||||
|
-- deleted via citus_columnar--11.1-1--11.2-1.sql.
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
CREATE OR REPLACE FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
|
||||||
|
RETURNS void
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
SET search_path = pg_catalog
|
||||||
|
AS $func$
|
||||||
|
BEGIN
|
||||||
|
INSERT INTO pg_depend
|
||||||
|
WITH columnar_schema_members(relid) AS (
|
||||||
|
SELECT pg_class.oid AS relid FROM pg_class
|
||||||
|
WHERE relnamespace =
|
||||||
|
COALESCE(
|
||||||
|
(SELECT pg_namespace.oid FROM pg_namespace WHERE nspname = 'columnar_internal'),
|
||||||
|
(SELECT pg_namespace.oid FROM pg_namespace WHERE nspname = 'columnar')
|
||||||
|
)
|
||||||
|
AND relname IN ('chunk',
|
||||||
|
'chunk_group',
|
||||||
|
'options',
|
||||||
|
'storageid_seq',
|
||||||
|
'stripe')
|
||||||
|
)
|
||||||
|
SELECT -- Define a dependency edge from "columnar table access method" ..
|
||||||
|
'pg_am'::regclass::oid as classid,
|
||||||
|
(select oid from pg_am where amname = 'columnar') as objid,
|
||||||
|
0 as objsubid,
|
||||||
|
-- ... to some objects registered as regclass and that lives in
|
||||||
|
-- "columnar" schema. That contains catalog tables and the sequences
|
||||||
|
-- created in "columnar" schema.
|
||||||
|
--
|
||||||
|
-- Given the possibility of user might have created their own objects
|
||||||
|
-- in columnar schema, we explicitly specify list of objects that we
|
||||||
|
-- are interested in.
|
||||||
|
'pg_class'::regclass::oid as refclassid,
|
||||||
|
columnar_schema_members.relid as refobjid,
|
||||||
|
0 as refobjsubid,
|
||||||
|
'n' as deptype
|
||||||
|
FROM columnar_schema_members
|
||||||
|
-- Avoid inserting duplicate entries into pg_depend.
|
||||||
|
EXCEPT TABLE pg_depend;
|
||||||
|
END;
|
||||||
|
$func$;
|
||||||
|
COMMENT ON FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
|
||||||
|
IS 'internal function responsible for creating dependencies from columnar '
|
||||||
|
'table access method to the rel objects in columnar schema';
|
|
@ -1,4 +1,4 @@
|
||||||
CREATE OR REPLACE FUNCTION citus_internal.columnar_ensure_am_depends_catalog()
|
CREATE OR REPLACE FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
|
||||||
RETURNS void
|
RETURNS void
|
||||||
LANGUAGE plpgsql
|
LANGUAGE plpgsql
|
||||||
SET search_path = pg_catalog
|
SET search_path = pg_catalog
|
||||||
|
@ -14,22 +14,17 @@ BEGIN
|
||||||
)
|
)
|
||||||
AND relname IN ('chunk',
|
AND relname IN ('chunk',
|
||||||
'chunk_group',
|
'chunk_group',
|
||||||
'chunk_group_pkey',
|
|
||||||
'chunk_pkey',
|
|
||||||
'options',
|
'options',
|
||||||
'options_pkey',
|
|
||||||
'storageid_seq',
|
'storageid_seq',
|
||||||
'stripe',
|
'stripe')
|
||||||
'stripe_first_row_number_idx',
|
|
||||||
'stripe_pkey')
|
|
||||||
)
|
)
|
||||||
SELECT -- Define a dependency edge from "columnar table access method" ..
|
SELECT -- Define a dependency edge from "columnar table access method" ..
|
||||||
'pg_am'::regclass::oid as classid,
|
'pg_am'::regclass::oid as classid,
|
||||||
(select oid from pg_am where amname = 'columnar') as objid,
|
(select oid from pg_am where amname = 'columnar') as objid,
|
||||||
0 as objsubid,
|
0 as objsubid,
|
||||||
-- ... to each object that is registered to pg_class and that lives
|
-- ... to some objects registered as regclass and that lives in
|
||||||
-- in "columnar" schema. That contains catalog tables, indexes
|
-- "columnar" schema. That contains catalog tables and the sequences
|
||||||
-- created on them and the sequences created in "columnar" schema.
|
-- created in "columnar" schema.
|
||||||
--
|
--
|
||||||
-- Given the possibility of user might have created their own objects
|
-- Given the possibility of user might have created their own objects
|
||||||
-- in columnar schema, we explicitly specify list of objects that we
|
-- in columnar schema, we explicitly specify list of objects that we
|
||||||
|
@ -43,6 +38,6 @@ BEGIN
|
||||||
EXCEPT TABLE pg_depend;
|
EXCEPT TABLE pg_depend;
|
||||||
END;
|
END;
|
||||||
$func$;
|
$func$;
|
||||||
COMMENT ON FUNCTION citus_internal.columnar_ensure_am_depends_catalog()
|
COMMENT ON FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
|
||||||
IS 'internal function responsible for creating dependencies from columnar '
|
IS 'internal function responsible for creating dependencies from columnar '
|
||||||
'table access method to the rel objects in columnar schema';
|
'table access method to the rel objects in columnar schema';
|
||||||
|
|
|
@ -32,7 +32,13 @@ OBJS += \
|
||||||
$(patsubst $(citus_abs_srcdir)/%.c,%.o,$(foreach dir,$(SUBDIRS), $(sort $(wildcard $(citus_abs_srcdir)/$(dir)/*.c))))
|
$(patsubst $(citus_abs_srcdir)/%.c,%.o,$(foreach dir,$(SUBDIRS), $(sort $(wildcard $(citus_abs_srcdir)/$(dir)/*.c))))
|
||||||
|
|
||||||
# be explicit about the default target
|
# be explicit about the default target
|
||||||
all:
|
.PHONY: cdc
|
||||||
|
|
||||||
|
all: cdc
|
||||||
|
|
||||||
|
cdc:
|
||||||
|
echo "running cdc make"
|
||||||
|
$(MAKE) DECODER=pgoutput -C cdc all
|
||||||
|
|
||||||
NO_PGXS = 1
|
NO_PGXS = 1
|
||||||
|
|
||||||
|
@ -81,11 +87,19 @@ endif
|
||||||
|
|
||||||
.PHONY: clean-full install install-downgrades install-all
|
.PHONY: clean-full install install-downgrades install-all
|
||||||
|
|
||||||
|
clean: clean-cdc
|
||||||
|
|
||||||
|
clean-cdc:
|
||||||
|
$(MAKE) DECODER=pgoutput -C cdc clean
|
||||||
|
|
||||||
cleanup-before-install:
|
cleanup-before-install:
|
||||||
rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus.control
|
rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus.control
|
||||||
rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus--*
|
rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus--*
|
||||||
|
|
||||||
install: cleanup-before-install
|
install: cleanup-before-install install-cdc
|
||||||
|
|
||||||
|
install-cdc:
|
||||||
|
$(MAKE) DECODER=pgoutput -C cdc install
|
||||||
|
|
||||||
# install and install-downgrades should be run sequentially
|
# install and install-downgrades should be run sequentially
|
||||||
install-all: install
|
install-all: install
|
||||||
|
@ -96,4 +110,5 @@ install-downgrades: $(generated_downgrade_sql_files)
|
||||||
|
|
||||||
clean-full:
|
clean-full:
|
||||||
$(MAKE) clean
|
$(MAKE) clean
|
||||||
|
$(MAKE) -C cdc clean-full
|
||||||
rm -rf $(safestringlib_builddir)
|
rm -rf $(safestringlib_builddir)
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
ifndef DECODER
|
||||||
|
DECODER = pgoutput
|
||||||
|
endif
|
||||||
|
|
||||||
|
MODULE_big = citus_$(DECODER)
|
||||||
|
citus_subdir = src/backend/distributed/cdc
|
||||||
|
citus_top_builddir = ../../../..
|
||||||
|
citus_decoders_dir = $(DESTDIR)$(pkglibdir)/citus_decoders
|
||||||
|
|
||||||
|
OBJS += cdc_decoder.o cdc_decoder_utils.o
|
||||||
|
|
||||||
|
include $(citus_top_builddir)/Makefile.global
|
||||||
|
|
||||||
|
override CFLAGS += -DDECODER=\"$(DECODER)\" -I$(citus_abs_top_srcdir)/include
|
||||||
|
override CPPFLAGS += -DDECODER=\"$(DECODER)\" -I$(citus_abs_top_srcdir)/include
|
||||||
|
|
||||||
|
install: install-cdc
|
||||||
|
|
||||||
|
clean: clean-cdc
|
||||||
|
|
||||||
|
install-cdc:
|
||||||
|
mkdir -p '$(citus_decoders_dir)'
|
||||||
|
$(INSTALL_SHLIB) citus_$(DECODER).so '$(citus_decoders_dir)/$(DECODER).so'
|
||||||
|
|
||||||
|
clean-cdc:
|
||||||
|
rm -f '$(DESTDIR)$(datadir)/$(datamoduledir)/citus_decoders/$(DECODER).so'
|
|
@ -0,0 +1,500 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* cdc_decoder.c
|
||||||
|
* CDC Decoder plugin for Citus
|
||||||
|
*
|
||||||
|
* Copyright (c) Citus Data, Inc.
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cdc_decoder_utils.h"
|
||||||
|
#include "postgres.h"
|
||||||
|
#include "fmgr.h"
|
||||||
|
|
||||||
|
#include "access/genam.h"
|
||||||
|
#include "catalog/pg_namespace.h"
|
||||||
|
#include "catalog/pg_publication.h"
|
||||||
|
#include "commands/extension.h"
|
||||||
|
#include "common/hashfn.h"
|
||||||
|
#include "utils/lsyscache.h"
|
||||||
|
#include "utils/rel.h"
|
||||||
|
#include "utils/typcache.h"
|
||||||
|
|
||||||
|
PG_MODULE_MAGIC;
|
||||||
|
|
||||||
|
extern void _PG_output_plugin_init(OutputPluginCallbacks *cb);
|
||||||
|
static LogicalDecodeChangeCB ouputPluginChangeCB;
|
||||||
|
|
||||||
|
static void InitShardToDistributedTableMap(void);
|
||||||
|
|
||||||
|
static void PublishDistributedTableChanges(LogicalDecodingContext *ctx,
|
||||||
|
ReorderBufferTXN *txn,
|
||||||
|
Relation relation,
|
||||||
|
ReorderBufferChange *change);
|
||||||
|
|
||||||
|
|
||||||
|
static bool replication_origin_filter_cb(LogicalDecodingContext *ctx, RepOriginId
|
||||||
|
origin_id);
|
||||||
|
|
||||||
|
static void TranslateChangesIfSchemaChanged(Relation relation, Relation targetRelation,
|
||||||
|
ReorderBufferChange *change);
|
||||||
|
|
||||||
|
static void TranslateAndPublishRelationForCDC(LogicalDecodingContext *ctx,
|
||||||
|
ReorderBufferTXN *txn,
|
||||||
|
Relation relation,
|
||||||
|
ReorderBufferChange *change, Oid shardId,
|
||||||
|
Oid targetRelationid);
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
uint64 shardId;
|
||||||
|
Oid distributedTableId;
|
||||||
|
bool isReferenceTable;
|
||||||
|
bool isNull;
|
||||||
|
} ShardIdHashEntry;
|
||||||
|
|
||||||
|
static HTAB *shardToDistributedTableMap = NULL;
|
||||||
|
|
||||||
|
static void cdc_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
||||||
|
Relation relation, ReorderBufferChange *change);
|
||||||
|
|
||||||
|
|
||||||
|
/* build time macro for base decoder plugin name for CDC and Shard Split. */
|
||||||
|
#ifndef DECODER
|
||||||
|
#define DECODER "pgoutput"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define DECODER_INIT_FUNCTION_NAME "_PG_output_plugin_init"
|
||||||
|
|
||||||
|
#define CITUS_SHARD_TRANSFER_SLOT_PREFIX "citus_shard_"
|
||||||
|
#define CITUS_SHARD_TRANSFER_SLOT_PREFIX_SIZE (sizeof(CITUS_SHARD_TRANSFER_SLOT_PREFIX) - \
|
||||||
|
1)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Postgres uses 'pgoutput' as default plugin for logical replication.
|
||||||
|
* We want to reuse Postgres pgoutput's functionality as much as possible.
|
||||||
|
* Hence we load all the functions of this plugin and override as required.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
_PG_output_plugin_init(OutputPluginCallbacks *cb)
|
||||||
|
{
|
||||||
|
elog(LOG, "Initializing CDC decoder");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We build custom .so files whose name matches common decoders (pgoutput, wal2json)
|
||||||
|
* and place them in $libdir/citus_decoders/ such that administrators can configure
|
||||||
|
* dynamic_library_path to include this directory, and users can then use the
|
||||||
|
* regular decoder names when creating replications slots.
|
||||||
|
*
|
||||||
|
* To load the original decoder, we need to remove citus_decoders/ from the
|
||||||
|
* dynamic_library_path.
|
||||||
|
*/
|
||||||
|
char *originalDLP = Dynamic_library_path;
|
||||||
|
Dynamic_library_path = RemoveCitusDecodersFromPaths(Dynamic_library_path);
|
||||||
|
|
||||||
|
LogicalOutputPluginInit plugin_init =
|
||||||
|
(LogicalOutputPluginInit) (void *)
|
||||||
|
load_external_function(DECODER,
|
||||||
|
DECODER_INIT_FUNCTION_NAME,
|
||||||
|
false, NULL);
|
||||||
|
|
||||||
|
if (plugin_init == NULL)
|
||||||
|
{
|
||||||
|
elog(ERROR, "output plugins have to declare the _PG_output_plugin_init symbol");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* in case this session is used for different replication slots */
|
||||||
|
Dynamic_library_path = originalDLP;
|
||||||
|
|
||||||
|
/* ask the output plugin to fill the callback struct */
|
||||||
|
plugin_init(cb);
|
||||||
|
|
||||||
|
/* Initialize the Shard Id to Distributed Table id mapping hash table.*/
|
||||||
|
InitShardToDistributedTableMap();
|
||||||
|
|
||||||
|
/* actual pgoutput callback function will be called */
|
||||||
|
ouputPluginChangeCB = cb->change_cb;
|
||||||
|
cb->change_cb = cdc_change_cb;
|
||||||
|
cb->filter_by_origin_cb = replication_origin_filter_cb;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if the replication slot is for Shard transfer by checking for prefix.
|
||||||
|
*/
|
||||||
|
inline static
|
||||||
|
bool
|
||||||
|
IsShardTransferSlot(char *replicationSlotName)
|
||||||
|
{
|
||||||
|
return strncmp(replicationSlotName, CITUS_SHARD_TRANSFER_SLOT_PREFIX,
|
||||||
|
CITUS_SHARD_TRANSFER_SLOT_PREFIX_SIZE) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* shard_split_and_cdc_change_cb function emits the incoming tuple change
|
||||||
|
* to the appropriate destination shard.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
cdc_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
||||||
|
Relation relation, ReorderBufferChange *change)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If Citus has not been loaded yet, pass the changes
|
||||||
|
* through to the undrelying decoder plugin.
|
||||||
|
*/
|
||||||
|
if (!CdcCitusHasBeenLoaded())
|
||||||
|
{
|
||||||
|
ouputPluginChangeCB(ctx, txn, relation, change);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check if the relation is publishable.*/
|
||||||
|
if (!is_publishable_relation(relation))
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *replicationSlotName = ctx->slot->data.name.data;
|
||||||
|
if (replicationSlotName == NULL)
|
||||||
|
{
|
||||||
|
elog(ERROR, "Replication slot name is NULL!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the slot is for internal shard operations, call the base plugin's call back. */
|
||||||
|
if (IsShardTransferSlot(replicationSlotName))
|
||||||
|
{
|
||||||
|
ouputPluginChangeCB(ctx, txn, relation, change);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Transalate the changes from shard to distributes table and publish. */
|
||||||
|
PublishDistributedTableChanges(ctx, txn, relation, change);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* InitShardToDistributedTableMap initializes the hash table that is used to
|
||||||
|
* translate the changes in the shard table to the changes in the distributed table.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
InitShardToDistributedTableMap()
|
||||||
|
{
|
||||||
|
HASHCTL info;
|
||||||
|
memset(&info, 0, sizeof(info));
|
||||||
|
info.keysize = sizeof(uint64);
|
||||||
|
info.entrysize = sizeof(ShardIdHashEntry);
|
||||||
|
info.hash = tag_hash;
|
||||||
|
info.hcxt = CurrentMemoryContext;
|
||||||
|
|
||||||
|
int hashFlags = (HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION);
|
||||||
|
shardToDistributedTableMap = hash_create("CDC Decoder translation hash table", 1024,
|
||||||
|
&info, hashFlags);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AddShardIdToHashTable adds the shardId to the hash table.
|
||||||
|
*/
|
||||||
|
static Oid
|
||||||
|
AddShardIdToHashTable(uint64 shardId, ShardIdHashEntry *entry)
|
||||||
|
{
|
||||||
|
entry->shardId = shardId;
|
||||||
|
entry->distributedTableId = CdcLookupShardRelationFromCatalog(shardId, true);
|
||||||
|
entry->isReferenceTable = CdcPartitionMethodViaCatalog(entry->distributedTableId) ==
|
||||||
|
'n';
|
||||||
|
return entry->distributedTableId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static Oid
|
||||||
|
LookupDistributedTableIdForShardId(uint64 shardId, bool *isReferenceTable)
|
||||||
|
{
|
||||||
|
bool found;
|
||||||
|
Oid distributedTableId = InvalidOid;
|
||||||
|
ShardIdHashEntry *entry = (ShardIdHashEntry *) hash_search(shardToDistributedTableMap,
|
||||||
|
&shardId,
|
||||||
|
HASH_ENTER,
|
||||||
|
&found);
|
||||||
|
if (found)
|
||||||
|
{
|
||||||
|
distributedTableId = entry->distributedTableId;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
distributedTableId = AddShardIdToHashTable(shardId, entry);
|
||||||
|
}
|
||||||
|
*isReferenceTable = entry->isReferenceTable;
|
||||||
|
return distributedTableId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* replication_origin_filter_cb call back function filters out publication of changes
|
||||||
|
* originated from any other node other than the current node. This is
|
||||||
|
* identified by the "origin_id" of the changes. The origin_id is set to
|
||||||
|
* a non-zero value in the origin node as part of WAL replication for internal
|
||||||
|
* operations like shard split/moves/create_distributed_table etc.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
replication_origin_filter_cb(LogicalDecodingContext *ctx, RepOriginId origin_id)
|
||||||
|
{
|
||||||
|
return (origin_id != InvalidRepOriginId);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function is responsible for translating the changes in the shard table to
|
||||||
|
* the changes in the shell table and publishing the changes as a change to the
|
||||||
|
* distributed table so that CDD clients are not aware of the shard tables. It also
|
||||||
|
* handles schema changes to the distributed table.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
TranslateAndPublishRelationForCDC(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
||||||
|
Relation relation, ReorderBufferChange *change, Oid
|
||||||
|
shardId, Oid targetRelationid)
|
||||||
|
{
|
||||||
|
/* Get the distributed table's relation for this shard.*/
|
||||||
|
Relation targetRelation = RelationIdGetRelation(targetRelationid);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if there has been a schema change (such as a dropped column), by comparing
|
||||||
|
* the number of attributes in the shard table and the shell table.
|
||||||
|
*/
|
||||||
|
TranslateChangesIfSchemaChanged(relation, targetRelation, change);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Publish the change to the shard table as the change in the distributed table,
|
||||||
|
* so that the CDC client can see the change in the distributed table,
|
||||||
|
* instead of the shard table, by calling the pgoutput's callback function.
|
||||||
|
*/
|
||||||
|
ouputPluginChangeCB(ctx, txn, targetRelation, change);
|
||||||
|
RelationClose(targetRelation);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PublishChangesIfCdcSlot checks if the current slot is a CDC slot. If so, it publishes
|
||||||
|
* the changes as the change for the distributed table instead of shard.
|
||||||
|
* If not, it returns false. It also skips the Citus metadata tables.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
PublishDistributedTableChanges(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
||||||
|
Relation relation, ReorderBufferChange *change)
|
||||||
|
{
|
||||||
|
char *shardRelationName = RelationGetRelationName(relation);
|
||||||
|
|
||||||
|
/* Skip publishing CDC changes for any system relations in pg_catalog*/
|
||||||
|
if (relation->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if the relation is a distributed table by checking for shard name. */
|
||||||
|
uint64 shardId = CdcExtractShardIdFromTableName(shardRelationName, true);
|
||||||
|
|
||||||
|
/* If this relation is not distributed, call the pgoutput's callback and return. */
|
||||||
|
if (shardId == INVALID_SHARD_ID)
|
||||||
|
{
|
||||||
|
ouputPluginChangeCB(ctx, txn, relation, change);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isReferenceTable = false;
|
||||||
|
Oid distRelationId = LookupDistributedTableIdForShardId(shardId, &isReferenceTable);
|
||||||
|
if (distRelationId == InvalidOid)
|
||||||
|
{
|
||||||
|
ouputPluginChangeCB(ctx, txn, relation, change);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Publish changes for reference table only from the coordinator node. */
|
||||||
|
if (isReferenceTable && !CdcIsCoordinator())
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* translate and publish from shard relation to distributed table relation for CDC. */
|
||||||
|
TranslateAndPublishRelationForCDC(ctx, txn, relation, change, shardId,
|
||||||
|
distRelationId);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetTupleForTargetSchemaForCdc returns a heap tuple with the data from sourceRelationTuple
|
||||||
|
* to match the schema in targetRelDesc. Either or both source and target relations may have
|
||||||
|
* dropped columns. This function handles it by adding NULL values for dropped columns in
|
||||||
|
* target relation and skipping dropped columns in source relation. It returns a heap tuple
|
||||||
|
* adjusted to the current schema of the target relation.
|
||||||
|
*/
|
||||||
|
static HeapTuple
|
||||||
|
GetTupleForTargetSchemaForCdc(HeapTuple sourceRelationTuple,
|
||||||
|
TupleDesc sourceRelDesc,
|
||||||
|
TupleDesc targetRelDesc)
|
||||||
|
{
|
||||||
|
/* Allocate memory for sourceValues and sourceNulls arrays. */
|
||||||
|
Datum *sourceValues = (Datum *) palloc0(sourceRelDesc->natts * sizeof(Datum));
|
||||||
|
bool *sourceNulls = (bool *) palloc0(sourceRelDesc->natts * sizeof(bool));
|
||||||
|
|
||||||
|
/* Deform the source tuple to sourceValues and sourceNulls arrays. */
|
||||||
|
heap_deform_tuple(sourceRelationTuple, sourceRelDesc, sourceValues,
|
||||||
|
sourceNulls);
|
||||||
|
|
||||||
|
/* This is the next field to Read in the source relation */
|
||||||
|
uint32 sourceIndex = 0;
|
||||||
|
uint32 targetIndex = 0;
|
||||||
|
|
||||||
|
/* Allocate memory for sourceValues and sourceNulls arrays. */
|
||||||
|
Datum *targetValues = (Datum *) palloc0(targetRelDesc->natts * sizeof(Datum));
|
||||||
|
bool *targetNulls = (bool *) palloc0(targetRelDesc->natts * sizeof(bool));
|
||||||
|
|
||||||
|
/* Loop through all source and target attributes one by one and handle any dropped attributes.*/
|
||||||
|
while (targetIndex < targetRelDesc->natts)
|
||||||
|
{
|
||||||
|
/* If this target attribute has been dropped, add a NULL attribute in targetValues and continue.*/
|
||||||
|
if (TupleDescAttr(targetRelDesc, targetIndex)->attisdropped)
|
||||||
|
{
|
||||||
|
Datum nullDatum = (Datum) 0;
|
||||||
|
targetValues[targetIndex] = nullDatum;
|
||||||
|
targetNulls[targetIndex] = true;
|
||||||
|
targetIndex++;
|
||||||
|
}
|
||||||
|
/* If this source attribute has been dropped, just skip this source attribute.*/
|
||||||
|
else if (TupleDescAttr(sourceRelDesc, sourceIndex)->attisdropped)
|
||||||
|
{
|
||||||
|
sourceIndex++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* If both source and target attributes are not dropped, add the attribute field to targetValues. */
|
||||||
|
else if (sourceIndex < sourceRelDesc->natts)
|
||||||
|
{
|
||||||
|
targetValues[targetIndex] = sourceValues[sourceIndex];
|
||||||
|
targetNulls[targetIndex] = sourceNulls[sourceIndex];
|
||||||
|
sourceIndex++;
|
||||||
|
targetIndex++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* If there are no more source fields, add a NULL field in targetValues. */
|
||||||
|
Datum nullDatum = (Datum) 0;
|
||||||
|
targetValues[targetIndex] = nullDatum;
|
||||||
|
targetNulls[targetIndex] = true;
|
||||||
|
targetIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Form a new tuple from the target values created by the above loop. */
|
||||||
|
HeapTuple targetRelationTuple = heap_form_tuple(targetRelDesc, targetValues,
|
||||||
|
targetNulls);
|
||||||
|
return targetRelationTuple;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* HasSchemaChanged function returns if there any schema changes between source and target relations.*/
|
||||||
|
static bool
|
||||||
|
HasSchemaChanged(TupleDesc sourceRelationDesc, TupleDesc targetRelationDesc)
|
||||||
|
{
|
||||||
|
bool hasSchemaChanged = (sourceRelationDesc->natts != targetRelationDesc->natts);
|
||||||
|
if (hasSchemaChanged)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32 i = 0; i < sourceRelationDesc->natts; i++)
|
||||||
|
{
|
||||||
|
if (TupleDescAttr(sourceRelationDesc, i)->attisdropped ||
|
||||||
|
TupleDescAttr(targetRelationDesc, i)->attisdropped)
|
||||||
|
{
|
||||||
|
hasSchemaChanged = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return hasSchemaChanged;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TranslateChangesIfSchemaChanged translates the tuples ReorderBufferChange
|
||||||
|
* if there is a schema change between source and target relations.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
TranslateChangesIfSchemaChanged(Relation sourceRelation, Relation targetRelation,
|
||||||
|
ReorderBufferChange *change)
|
||||||
|
{
|
||||||
|
TupleDesc sourceRelationDesc = RelationGetDescr(sourceRelation);
|
||||||
|
TupleDesc targetRelationDesc = RelationGetDescr(targetRelation);
|
||||||
|
|
||||||
|
/* if there are no changes between source and target relations, return. */
|
||||||
|
if (!HasSchemaChanged(sourceRelationDesc, targetRelationDesc))
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check the ReorderBufferChange's action type and handle them accordingly.*/
|
||||||
|
switch (change->action)
|
||||||
|
{
|
||||||
|
case REORDER_BUFFER_CHANGE_INSERT:
|
||||||
|
{
|
||||||
|
/* For insert action, only new tuple should always be translated*/
|
||||||
|
HeapTuple sourceRelationNewTuple = &(change->data.tp.newtuple->tuple);
|
||||||
|
HeapTuple targetRelationNewTuple = GetTupleForTargetSchemaForCdc(
|
||||||
|
sourceRelationNewTuple, sourceRelationDesc, targetRelationDesc);
|
||||||
|
change->data.tp.newtuple->tuple = *targetRelationNewTuple;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For update changes both old and new tuples need to be translated for target relation
|
||||||
|
* if the REPLICA IDENTITY is set to FULL. Otherwise, only the new tuple needs to be
|
||||||
|
* translated for target relation.
|
||||||
|
*/
|
||||||
|
case REORDER_BUFFER_CHANGE_UPDATE:
|
||||||
|
{
|
||||||
|
/* For update action, new tuple should always be translated*/
|
||||||
|
/* Get the new tuple from the ReorderBufferChange, and translate it to target relation. */
|
||||||
|
HeapTuple sourceRelationNewTuple = &(change->data.tp.newtuple->tuple);
|
||||||
|
HeapTuple targetRelationNewTuple = GetTupleForTargetSchemaForCdc(
|
||||||
|
sourceRelationNewTuple, sourceRelationDesc, targetRelationDesc);
|
||||||
|
change->data.tp.newtuple->tuple = *targetRelationNewTuple;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Format oldtuple according to the target relation. If the column values of replica
|
||||||
|
* identiy change, then the old tuple is non-null and needs to be formatted according
|
||||||
|
* to the target relation schema.
|
||||||
|
*/
|
||||||
|
if (change->data.tp.oldtuple != NULL)
|
||||||
|
{
|
||||||
|
HeapTuple sourceRelationOldTuple = &(change->data.tp.oldtuple->tuple);
|
||||||
|
HeapTuple targetRelationOldTuple = GetTupleForTargetSchemaForCdc(
|
||||||
|
sourceRelationOldTuple,
|
||||||
|
sourceRelationDesc,
|
||||||
|
targetRelationDesc);
|
||||||
|
|
||||||
|
change->data.tp.oldtuple->tuple = *targetRelationOldTuple;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case REORDER_BUFFER_CHANGE_DELETE:
|
||||||
|
{
|
||||||
|
/* For delete action, only old tuple should be translated*/
|
||||||
|
HeapTuple sourceRelationOldTuple = &(change->data.tp.oldtuple->tuple);
|
||||||
|
HeapTuple targetRelationOldTuple = GetTupleForTargetSchemaForCdc(
|
||||||
|
sourceRelationOldTuple,
|
||||||
|
sourceRelationDesc,
|
||||||
|
targetRelationDesc);
|
||||||
|
|
||||||
|
change->data.tp.oldtuple->tuple = *targetRelationOldTuple;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
/* Do nothing for other action types. */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,432 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* cdc_decoder_utils.c
|
||||||
|
* CDC Decoder plugin utility functions for Citus
|
||||||
|
*
|
||||||
|
* Copyright (c) Citus Data, Inc.
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#include "postgres.h"
|
||||||
|
#include "commands/extension.h"
|
||||||
|
#include "fmgr.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
#include "access/genam.h"
|
||||||
|
#include "access/heapam.h"
|
||||||
|
#include "common/hashfn.h"
|
||||||
|
#include "common/string.h"
|
||||||
|
#include "utils/fmgroids.h"
|
||||||
|
#include "utils/typcache.h"
|
||||||
|
#include "utils/lsyscache.h"
|
||||||
|
#include "catalog/pg_namespace.h"
|
||||||
|
#include "cdc_decoder_utils.h"
|
||||||
|
#include "distributed/pg_dist_partition.h"
|
||||||
|
#include "distributed/pg_dist_shard.h"
|
||||||
|
#include "distributed/relay_utility.h"
|
||||||
|
|
||||||
|
static int32 LocalGroupId = -1;
|
||||||
|
static Oid PgDistLocalGroupRelationId = InvalidOid;
|
||||||
|
static Oid PgDistShardRelationId = InvalidOid;
|
||||||
|
static Oid PgDistShardShardidIndexId = InvalidOid;
|
||||||
|
static Oid PgDistPartitionRelationId = InvalidOid;
|
||||||
|
static Oid PgDistPartitionLogicalrelidIndexId = InvalidOid;
|
||||||
|
static bool IsCitusExtensionLoaded = false;
|
||||||
|
|
||||||
|
#define COORDINATOR_GROUP_ID 0
|
||||||
|
#define InvalidRepOriginId 0
|
||||||
|
#define Anum_pg_dist_local_groupid 1
|
||||||
|
#define GROUP_ID_UPGRADING -2
|
||||||
|
|
||||||
|
|
||||||
|
static Oid DistLocalGroupIdRelationId(void);
|
||||||
|
static int32 CdcGetLocalGroupId(void);
|
||||||
|
static HeapTuple CdcPgDistPartitionTupleViaCatalog(Oid relationId);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DistLocalGroupIdRelationId returns the relation id of the pg_dist_local_group
|
||||||
|
*/
|
||||||
|
static Oid
|
||||||
|
DistLocalGroupIdRelationId(void)
|
||||||
|
{
|
||||||
|
if (PgDistLocalGroupRelationId == InvalidOid)
|
||||||
|
{
|
||||||
|
PgDistLocalGroupRelationId = get_relname_relid("pg_dist_local_group",
|
||||||
|
PG_CATALOG_NAMESPACE);
|
||||||
|
}
|
||||||
|
return PgDistLocalGroupRelationId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DistShardRelationId returns the relation id of the pg_dist_shard
|
||||||
|
*/
|
||||||
|
static Oid
|
||||||
|
DistShardRelationId(void)
|
||||||
|
{
|
||||||
|
if (PgDistShardRelationId == InvalidOid)
|
||||||
|
{
|
||||||
|
PgDistShardRelationId = get_relname_relid("pg_dist_shard", PG_CATALOG_NAMESPACE);
|
||||||
|
}
|
||||||
|
return PgDistShardRelationId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DistShardRelationId returns the relation id of the pg_dist_shard
|
||||||
|
*/
|
||||||
|
static Oid
|
||||||
|
DistShardShardidIndexId(void)
|
||||||
|
{
|
||||||
|
if (PgDistShardShardidIndexId == InvalidOid)
|
||||||
|
{
|
||||||
|
PgDistShardShardidIndexId = get_relname_relid("pg_dist_shard_shardid_index",
|
||||||
|
PG_CATALOG_NAMESPACE);
|
||||||
|
}
|
||||||
|
return PgDistShardShardidIndexId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DistShardRelationId returns the relation id of the pg_dist_shard
|
||||||
|
*/
|
||||||
|
static Oid
|
||||||
|
DistPartitionRelationId(void)
|
||||||
|
{
|
||||||
|
if (PgDistPartitionRelationId == InvalidOid)
|
||||||
|
{
|
||||||
|
PgDistPartitionRelationId = get_relname_relid("pg_dist_partition",
|
||||||
|
PG_CATALOG_NAMESPACE);
|
||||||
|
}
|
||||||
|
return PgDistPartitionRelationId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static Oid
|
||||||
|
DistPartitionLogicalRelidIndexId(void)
|
||||||
|
{
|
||||||
|
if (PgDistPartitionLogicalrelidIndexId == InvalidOid)
|
||||||
|
{
|
||||||
|
PgDistPartitionLogicalrelidIndexId = get_relname_relid(
|
||||||
|
"pg_dist_partition_logicalrelid_index", PG_CATALOG_NAMESPACE);
|
||||||
|
}
|
||||||
|
return PgDistPartitionLogicalrelidIndexId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CdcIsCoordinator function returns true if this node is identified as the
|
||||||
|
* schema/coordinator/master node of the cluster.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
CdcIsCoordinator(void)
|
||||||
|
{
|
||||||
|
return (CdcGetLocalGroupId() == COORDINATOR_GROUP_ID);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CdcCitusHasBeenLoaded function returns true if the citus extension has been loaded.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
CdcCitusHasBeenLoaded()
|
||||||
|
{
|
||||||
|
if (!IsCitusExtensionLoaded)
|
||||||
|
{
|
||||||
|
IsCitusExtensionLoaded = (get_extension_oid("citus", true) != InvalidOid);
|
||||||
|
}
|
||||||
|
|
||||||
|
return IsCitusExtensionLoaded;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ExtractShardIdFromTableName tries to extract shard id from the given table name,
|
||||||
|
* and returns the shard id if table name is formatted as shard name.
|
||||||
|
* Else, the function returns INVALID_SHARD_ID.
|
||||||
|
*/
|
||||||
|
uint64
|
||||||
|
CdcExtractShardIdFromTableName(const char *tableName, bool missingOk)
|
||||||
|
{
|
||||||
|
char *shardIdStringEnd = NULL;
|
||||||
|
|
||||||
|
/* find the last underscore and increment for shardId string */
|
||||||
|
char *shardIdString = strrchr(tableName, SHARD_NAME_SEPARATOR);
|
||||||
|
if (shardIdString == NULL && !missingOk)
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("could not extract shardId from table name \"%s\"",
|
||||||
|
tableName)));
|
||||||
|
}
|
||||||
|
else if (shardIdString == NULL && missingOk)
|
||||||
|
{
|
||||||
|
return INVALID_SHARD_ID;
|
||||||
|
}
|
||||||
|
|
||||||
|
shardIdString++;
|
||||||
|
|
||||||
|
errno = 0;
|
||||||
|
uint64 shardId = strtoull(shardIdString, &shardIdStringEnd, 0);
|
||||||
|
|
||||||
|
if (errno != 0 || (*shardIdStringEnd != '\0'))
|
||||||
|
{
|
||||||
|
if (!missingOk)
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("could not extract shardId from table name \"%s\"",
|
||||||
|
tableName)));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return INVALID_SHARD_ID;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return shardId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CdcGetLocalGroupId returns the group identifier of the local node. The function assumes
|
||||||
|
* that pg_dist_local_node_group has exactly one row and has at least one column.
|
||||||
|
* Otherwise, the function errors out.
|
||||||
|
*/
|
||||||
|
static int32
|
||||||
|
CdcGetLocalGroupId(void)
|
||||||
|
{
|
||||||
|
ScanKeyData scanKey[1];
|
||||||
|
int scanKeyCount = 0;
|
||||||
|
int32 groupId = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Already set the group id, no need to read the heap again.
|
||||||
|
*/
|
||||||
|
if (LocalGroupId != -1)
|
||||||
|
{
|
||||||
|
return LocalGroupId;
|
||||||
|
}
|
||||||
|
|
||||||
|
Oid localGroupTableOid = DistLocalGroupIdRelationId();
|
||||||
|
if (localGroupTableOid == InvalidOid)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Relation pgDistLocalGroupId = table_open(localGroupTableOid, AccessShareLock);
|
||||||
|
|
||||||
|
SysScanDesc scanDescriptor = systable_beginscan(pgDistLocalGroupId,
|
||||||
|
InvalidOid, false,
|
||||||
|
NULL, scanKeyCount, scanKey);
|
||||||
|
|
||||||
|
TupleDesc tupleDescriptor = RelationGetDescr(pgDistLocalGroupId);
|
||||||
|
|
||||||
|
HeapTuple heapTuple = systable_getnext(scanDescriptor);
|
||||||
|
|
||||||
|
if (HeapTupleIsValid(heapTuple))
|
||||||
|
{
|
||||||
|
bool isNull = false;
|
||||||
|
Datum groupIdDatum = heap_getattr(heapTuple,
|
||||||
|
Anum_pg_dist_local_groupid,
|
||||||
|
tupleDescriptor, &isNull);
|
||||||
|
|
||||||
|
groupId = DatumGetInt32(groupIdDatum);
|
||||||
|
|
||||||
|
/* set the local cache variable */
|
||||||
|
LocalGroupId = groupId;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Upgrade is happening. When upgrading postgres, pg_dist_local_group is
|
||||||
|
* temporarily empty before citus_finish_pg_upgrade() finishes execution.
|
||||||
|
*/
|
||||||
|
groupId = GROUP_ID_UPGRADING;
|
||||||
|
}
|
||||||
|
|
||||||
|
systable_endscan(scanDescriptor);
|
||||||
|
table_close(pgDistLocalGroupId, AccessShareLock);
|
||||||
|
|
||||||
|
return groupId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CdcLookupShardRelationFromCatalog returns the logical relation oid a shard belongs to.
|
||||||
|
*
|
||||||
|
* Errors out if the shardId does not exist and missingOk is false.
|
||||||
|
* Returns InvalidOid if the shardId does not exist and missingOk is true.
|
||||||
|
*/
|
||||||
|
Oid
|
||||||
|
CdcLookupShardRelationFromCatalog(int64 shardId, bool missingOk)
|
||||||
|
{
|
||||||
|
ScanKeyData scanKey[1];
|
||||||
|
int scanKeyCount = 1;
|
||||||
|
Form_pg_dist_shard shardForm = NULL;
|
||||||
|
Relation pgDistShard = table_open(DistShardRelationId(), AccessShareLock);
|
||||||
|
Oid relationId = InvalidOid;
|
||||||
|
|
||||||
|
ScanKeyInit(&scanKey[0], Anum_pg_dist_shard_shardid,
|
||||||
|
BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(shardId));
|
||||||
|
|
||||||
|
SysScanDesc scanDescriptor = systable_beginscan(pgDistShard,
|
||||||
|
DistShardShardidIndexId(), true,
|
||||||
|
NULL, scanKeyCount, scanKey);
|
||||||
|
|
||||||
|
HeapTuple heapTuple = systable_getnext(scanDescriptor);
|
||||||
|
if (!HeapTupleIsValid(heapTuple) && !missingOk)
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("could not find valid entry for shard "
|
||||||
|
UINT64_FORMAT, shardId)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!HeapTupleIsValid(heapTuple))
|
||||||
|
{
|
||||||
|
relationId = InvalidOid;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
shardForm = (Form_pg_dist_shard) GETSTRUCT(heapTuple);
|
||||||
|
relationId = shardForm->logicalrelid;
|
||||||
|
}
|
||||||
|
|
||||||
|
systable_endscan(scanDescriptor);
|
||||||
|
table_close(pgDistShard, NoLock);
|
||||||
|
|
||||||
|
return relationId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CdcPgDistPartitionTupleViaCatalog is a helper function that searches
|
||||||
|
* pg_dist_partition for the given relationId. The caller is responsible
|
||||||
|
* for ensuring that the returned heap tuple is valid before accessing
|
||||||
|
* its fields.
|
||||||
|
*/
|
||||||
|
static HeapTuple
|
||||||
|
CdcPgDistPartitionTupleViaCatalog(Oid relationId)
|
||||||
|
{
|
||||||
|
const int scanKeyCount = 1;
|
||||||
|
ScanKeyData scanKey[1];
|
||||||
|
bool indexOK = true;
|
||||||
|
|
||||||
|
Relation pgDistPartition = table_open(DistPartitionRelationId(), AccessShareLock);
|
||||||
|
|
||||||
|
ScanKeyInit(&scanKey[0], Anum_pg_dist_partition_logicalrelid,
|
||||||
|
BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relationId));
|
||||||
|
|
||||||
|
SysScanDesc scanDescriptor = systable_beginscan(pgDistPartition,
|
||||||
|
DistPartitionLogicalRelidIndexId(),
|
||||||
|
indexOK, NULL, scanKeyCount, scanKey);
|
||||||
|
|
||||||
|
HeapTuple partitionTuple = systable_getnext(scanDescriptor);
|
||||||
|
|
||||||
|
if (HeapTupleIsValid(partitionTuple))
|
||||||
|
{
|
||||||
|
/* callers should have the tuple in their memory contexts */
|
||||||
|
partitionTuple = heap_copytuple(partitionTuple);
|
||||||
|
}
|
||||||
|
|
||||||
|
systable_endscan(scanDescriptor);
|
||||||
|
table_close(pgDistPartition, AccessShareLock);
|
||||||
|
|
||||||
|
return partitionTuple;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CdcPartitionMethodViaCatalog gets a relationId and returns the partition
|
||||||
|
* method column from pg_dist_partition via reading from catalog.
|
||||||
|
*/
|
||||||
|
char
|
||||||
|
CdcPartitionMethodViaCatalog(Oid relationId)
|
||||||
|
{
|
||||||
|
HeapTuple partitionTuple = CdcPgDistPartitionTupleViaCatalog(relationId);
|
||||||
|
if (!HeapTupleIsValid(partitionTuple))
|
||||||
|
{
|
||||||
|
return DISTRIBUTE_BY_INVALID;
|
||||||
|
}
|
||||||
|
|
||||||
|
Datum datumArray[Natts_pg_dist_partition];
|
||||||
|
bool isNullArray[Natts_pg_dist_partition];
|
||||||
|
|
||||||
|
Relation pgDistPartition = table_open(DistPartitionRelationId(), AccessShareLock);
|
||||||
|
|
||||||
|
TupleDesc tupleDescriptor = RelationGetDescr(pgDistPartition);
|
||||||
|
heap_deform_tuple(partitionTuple, tupleDescriptor, datumArray, isNullArray);
|
||||||
|
|
||||||
|
if (isNullArray[Anum_pg_dist_partition_partmethod - 1])
|
||||||
|
{
|
||||||
|
/* partition method cannot be NULL, still let's make sure */
|
||||||
|
heap_freetuple(partitionTuple);
|
||||||
|
table_close(pgDistPartition, NoLock);
|
||||||
|
return DISTRIBUTE_BY_INVALID;
|
||||||
|
}
|
||||||
|
|
||||||
|
Datum partitionMethodDatum = datumArray[Anum_pg_dist_partition_partmethod - 1];
|
||||||
|
char partitionMethodChar = DatumGetChar(partitionMethodDatum);
|
||||||
|
|
||||||
|
heap_freetuple(partitionTuple);
|
||||||
|
table_close(pgDistPartition, NoLock);
|
||||||
|
|
||||||
|
return partitionMethodChar;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RemoveCitusDecodersFromPaths removes a path ending in citus_decoders
|
||||||
|
* from the given input paths.
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
RemoveCitusDecodersFromPaths(char *paths)
|
||||||
|
{
|
||||||
|
if (strlen(paths) == 0)
|
||||||
|
{
|
||||||
|
/* dynamic_library_path is empty */
|
||||||
|
return paths;
|
||||||
|
}
|
||||||
|
|
||||||
|
StringInfo newPaths = makeStringInfo();
|
||||||
|
|
||||||
|
char *remainingPaths = paths;
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
int pathLength = 0;
|
||||||
|
|
||||||
|
char *pathStart = first_path_var_separator(remainingPaths);
|
||||||
|
if (pathStart == remainingPaths)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* This will error out in find_in_dynamic_libpath, return
|
||||||
|
* original value here.
|
||||||
|
*/
|
||||||
|
return paths;
|
||||||
|
}
|
||||||
|
else if (pathStart == NULL)
|
||||||
|
{
|
||||||
|
/* final path */
|
||||||
|
pathLength = strlen(remainingPaths);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* more paths remaining */
|
||||||
|
pathLength = pathStart - remainingPaths;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *currentPath = palloc(pathLength + 1);
|
||||||
|
strlcpy(currentPath, remainingPaths, pathLength + 1);
|
||||||
|
canonicalize_path(currentPath);
|
||||||
|
|
||||||
|
if (!pg_str_endswith(currentPath, "/citus_decoders"))
|
||||||
|
{
|
||||||
|
appendStringInfo(newPaths, "%s%s", newPaths->len > 0 ? ":" : "", currentPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (remainingPaths[pathLength] == '\0')
|
||||||
|
{
|
||||||
|
/* end of string */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
remainingPaths += pathLength + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return newPaths->data;
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* cdc_decoder_utils.h
|
||||||
|
* Utility functions and declerations for cdc decoder.
|
||||||
|
*
|
||||||
|
* Copyright (c) Citus Data, Inc.
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CITUS_CDC_DECODER_H
|
||||||
|
#define CITUS_CDC_DECODER_H
|
||||||
|
|
||||||
|
#include "postgres.h"
|
||||||
|
#include "fmgr.h"
|
||||||
|
#include "replication/logical.h"
|
||||||
|
#include "c.h"
|
||||||
|
|
||||||
|
#define InvalidRepOriginId 0
|
||||||
|
#define INVALID_SHARD_ID 0
|
||||||
|
|
||||||
|
bool CdcIsCoordinator(void);
|
||||||
|
|
||||||
|
uint64 CdcExtractShardIdFromTableName(const char *tableName, bool missingOk);
|
||||||
|
|
||||||
|
Oid CdcLookupShardRelationFromCatalog(int64 shardId, bool missingOk);
|
||||||
|
|
||||||
|
char CdcPartitionMethodViaCatalog(Oid relationId);
|
||||||
|
|
||||||
|
bool CdcCitusHasBeenLoaded(void);
|
||||||
|
|
||||||
|
char * RemoveCitusDecodersFromPaths(char *paths);
|
||||||
|
|
||||||
|
#endif /* CITUS_CDC_DECODER_UTILS_H */
|
|
@ -55,6 +55,7 @@
|
||||||
#include "distributed/multi_partitioning_utils.h"
|
#include "distributed/multi_partitioning_utils.h"
|
||||||
#include "distributed/reference_table_utils.h"
|
#include "distributed/reference_table_utils.h"
|
||||||
#include "distributed/relation_access_tracking.h"
|
#include "distributed/relation_access_tracking.h"
|
||||||
|
#include "distributed/replication_origin_session_utils.h"
|
||||||
#include "distributed/shared_library_init.h"
|
#include "distributed/shared_library_init.h"
|
||||||
#include "distributed/shard_utils.h"
|
#include "distributed/shard_utils.h"
|
||||||
#include "distributed/worker_protocol.h"
|
#include "distributed/worker_protocol.h"
|
||||||
|
@ -183,6 +184,7 @@ static TableConversionReturn * AlterDistributedTable(TableConversionParameters *
|
||||||
static TableConversionReturn * AlterTableSetAccessMethod(
|
static TableConversionReturn * AlterTableSetAccessMethod(
|
||||||
TableConversionParameters *params);
|
TableConversionParameters *params);
|
||||||
static TableConversionReturn * ConvertTable(TableConversionState *con);
|
static TableConversionReturn * ConvertTable(TableConversionState *con);
|
||||||
|
static TableConversionReturn * ConvertTableInternal(TableConversionState *con);
|
||||||
static bool SwitchToSequentialAndLocalExecutionIfShardNameTooLong(char *relationName,
|
static bool SwitchToSequentialAndLocalExecutionIfShardNameTooLong(char *relationName,
|
||||||
char *longestShardName);
|
char *longestShardName);
|
||||||
static void DropIndexesNotSupportedByColumnar(Oid relationId,
|
static void DropIndexesNotSupportedByColumnar(Oid relationId,
|
||||||
|
@ -215,7 +217,10 @@ static bool WillRecreateForeignKeyToReferenceTable(Oid relationId,
|
||||||
CascadeToColocatedOption cascadeOption);
|
CascadeToColocatedOption cascadeOption);
|
||||||
static void WarningsForDroppingForeignKeysWithDistributedTables(Oid relationId);
|
static void WarningsForDroppingForeignKeysWithDistributedTables(Oid relationId);
|
||||||
static void ErrorIfUnsupportedCascadeObjects(Oid relationId);
|
static void ErrorIfUnsupportedCascadeObjects(Oid relationId);
|
||||||
|
static List * WrapTableDDLCommands(List *commandStrings);
|
||||||
static bool DoesCascadeDropUnsupportedObject(Oid classId, Oid id, HTAB *nodeMap);
|
static bool DoesCascadeDropUnsupportedObject(Oid classId, Oid id, HTAB *nodeMap);
|
||||||
|
static TableConversionReturn * CopyTableConversionReturnIntoCurrentContext(
|
||||||
|
TableConversionReturn *tableConversionReturn);
|
||||||
|
|
||||||
PG_FUNCTION_INFO_V1(undistribute_table);
|
PG_FUNCTION_INFO_V1(undistribute_table);
|
||||||
PG_FUNCTION_INFO_V1(alter_distributed_table);
|
PG_FUNCTION_INFO_V1(alter_distributed_table);
|
||||||
|
@ -402,7 +407,11 @@ UndistributeTable(TableConversionParameters *params)
|
||||||
params->conversionType = UNDISTRIBUTE_TABLE;
|
params->conversionType = UNDISTRIBUTE_TABLE;
|
||||||
params->shardCountIsNull = true;
|
params->shardCountIsNull = true;
|
||||||
TableConversionState *con = CreateTableConversion(params);
|
TableConversionState *con = CreateTableConversion(params);
|
||||||
return ConvertTable(con);
|
|
||||||
|
SetupReplicationOriginLocalSession();
|
||||||
|
TableConversionReturn *conv = ConvertTable(con);
|
||||||
|
ResetReplicationOriginLocalSession();
|
||||||
|
return conv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -441,6 +450,7 @@ AlterDistributedTable(TableConversionParameters *params)
|
||||||
ereport(DEBUG1, (errmsg("setting multi shard modify mode to sequential")));
|
ereport(DEBUG1, (errmsg("setting multi shard modify mode to sequential")));
|
||||||
SetLocalMultiShardModifyModeToSequential();
|
SetLocalMultiShardModifyModeToSequential();
|
||||||
}
|
}
|
||||||
|
|
||||||
return ConvertTable(con);
|
return ConvertTable(con);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -511,9 +521,9 @@ AlterTableSetAccessMethod(TableConversionParameters *params)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ConvertTable is used for converting a table into a new table with different properties.
|
* ConvertTableInternal is used for converting a table into a new table with different
|
||||||
* The conversion is done by creating a new table, moving everything to the new table and
|
* properties. The conversion is done by creating a new table, moving everything to the
|
||||||
* dropping the old one. So the oid of the table is not preserved.
|
* new table and dropping the old one. So the oid of the table is not preserved.
|
||||||
*
|
*
|
||||||
* The new table will have the same name, columns and rows. It will also have partitions,
|
* The new table will have the same name, columns and rows. It will also have partitions,
|
||||||
* views, sequences of the old table. Finally it will have everything created by
|
* views, sequences of the old table. Finally it will have everything created by
|
||||||
|
@ -532,7 +542,7 @@ AlterTableSetAccessMethod(TableConversionParameters *params)
|
||||||
* in case you add a new way to return from this function.
|
* in case you add a new way to return from this function.
|
||||||
*/
|
*/
|
||||||
TableConversionReturn *
|
TableConversionReturn *
|
||||||
ConvertTable(TableConversionState *con)
|
ConvertTableInternal(TableConversionState *con)
|
||||||
{
|
{
|
||||||
InTableTypeConversionFunctionCall = true;
|
InTableTypeConversionFunctionCall = true;
|
||||||
|
|
||||||
|
@ -595,9 +605,18 @@ ConvertTable(TableConversionState *con)
|
||||||
List *justBeforeDropCommands = NIL;
|
List *justBeforeDropCommands = NIL;
|
||||||
List *attachPartitionCommands = NIL;
|
List *attachPartitionCommands = NIL;
|
||||||
|
|
||||||
postLoadCommands =
|
List *createViewCommands = GetViewCreationCommandsOfTable(con->relationId);
|
||||||
list_concat(postLoadCommands,
|
|
||||||
GetViewCreationTableDDLCommandsOfTable(con->relationId));
|
postLoadCommands = list_concat(postLoadCommands,
|
||||||
|
WrapTableDDLCommands(createViewCommands));
|
||||||
|
|
||||||
|
/* need to add back to publications after dropping the original table */
|
||||||
|
bool isAdd = true;
|
||||||
|
List *alterPublicationCommands =
|
||||||
|
GetAlterPublicationDDLCommandsForTable(con->relationId, isAdd);
|
||||||
|
|
||||||
|
postLoadCommands = list_concat(postLoadCommands,
|
||||||
|
WrapTableDDLCommands(alterPublicationCommands));
|
||||||
|
|
||||||
List *foreignKeyCommands = NIL;
|
List *foreignKeyCommands = NIL;
|
||||||
if (con->conversionType == ALTER_DISTRIBUTED_TABLE)
|
if (con->conversionType == ALTER_DISTRIBUTED_TABLE)
|
||||||
|
@ -800,9 +819,21 @@ ConvertTable(TableConversionState *con)
|
||||||
ExecuteQueryViaSPI(tableConstructionSQL, SPI_OK_UTILITY);
|
ExecuteQueryViaSPI(tableConstructionSQL, SPI_OK_UTILITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* when there are many partitions, each call to ProcessUtilityParseTree
|
||||||
|
* accumulates used memory. Free context after each call.
|
||||||
|
*/
|
||||||
|
MemoryContext citusPerPartitionContext =
|
||||||
|
AllocSetContextCreate(CurrentMemoryContext,
|
||||||
|
"citus_per_partition_context",
|
||||||
|
ALLOCSET_DEFAULT_SIZES);
|
||||||
|
MemoryContext oldContext = MemoryContextSwitchTo(citusPerPartitionContext);
|
||||||
|
|
||||||
char *attachPartitionCommand = NULL;
|
char *attachPartitionCommand = NULL;
|
||||||
foreach_ptr(attachPartitionCommand, attachPartitionCommands)
|
foreach_ptr(attachPartitionCommand, attachPartitionCommands)
|
||||||
{
|
{
|
||||||
|
MemoryContextReset(citusPerPartitionContext);
|
||||||
|
|
||||||
Node *parseTree = ParseTreeNode(attachPartitionCommand);
|
Node *parseTree = ParseTreeNode(attachPartitionCommand);
|
||||||
|
|
||||||
ProcessUtilityParseTree(parseTree, attachPartitionCommand,
|
ProcessUtilityParseTree(parseTree, attachPartitionCommand,
|
||||||
|
@ -810,6 +841,9 @@ ConvertTable(TableConversionState *con)
|
||||||
NULL, None_Receiver, NULL);
|
NULL, None_Receiver, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MemoryContextSwitchTo(oldContext);
|
||||||
|
MemoryContextDelete(citusPerPartitionContext);
|
||||||
|
|
||||||
if (isPartitionTable)
|
if (isPartitionTable)
|
||||||
{
|
{
|
||||||
ExecuteQueryViaSPI(attachToParentCommand, SPI_OK_UTILITY);
|
ExecuteQueryViaSPI(attachToParentCommand, SPI_OK_UTILITY);
|
||||||
|
@ -869,10 +903,77 @@ ConvertTable(TableConversionState *con)
|
||||||
SetLocalEnableLocalReferenceForeignKeys(oldEnableLocalReferenceForeignKeys);
|
SetLocalEnableLocalReferenceForeignKeys(oldEnableLocalReferenceForeignKeys);
|
||||||
|
|
||||||
InTableTypeConversionFunctionCall = false;
|
InTableTypeConversionFunctionCall = false;
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CopyTableConversionReturnIntoCurrentContext copies given tableConversionReturn
|
||||||
|
* into CurrentMemoryContext.
|
||||||
|
*/
|
||||||
|
static TableConversionReturn *
|
||||||
|
CopyTableConversionReturnIntoCurrentContext(TableConversionReturn *tableConversionReturn)
|
||||||
|
{
|
||||||
|
TableConversionReturn *tableConversionReturnCopy = NULL;
|
||||||
|
if (tableConversionReturn)
|
||||||
|
{
|
||||||
|
tableConversionReturnCopy = palloc0(sizeof(TableConversionReturn));
|
||||||
|
List *copyForeignKeyCommands = NIL;
|
||||||
|
char *foreignKeyCommand = NULL;
|
||||||
|
foreach_ptr(foreignKeyCommand, tableConversionReturn->foreignKeyCommands)
|
||||||
|
{
|
||||||
|
char *copyForeignKeyCommand = MemoryContextStrdup(CurrentMemoryContext,
|
||||||
|
foreignKeyCommand);
|
||||||
|
copyForeignKeyCommands = lappend(copyForeignKeyCommands,
|
||||||
|
copyForeignKeyCommand);
|
||||||
|
}
|
||||||
|
tableConversionReturnCopy->foreignKeyCommands = copyForeignKeyCommands;
|
||||||
|
}
|
||||||
|
|
||||||
|
return tableConversionReturnCopy;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ConvertTable is a wrapper for ConvertTableInternal to persist only
|
||||||
|
* TableConversionReturn and delete all other allocations.
|
||||||
|
*/
|
||||||
|
static TableConversionReturn *
|
||||||
|
ConvertTable(TableConversionState *con)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We do not allow alter_distributed_table and undistribute_table operations
|
||||||
|
* for tables with identity columns. This is because we do not have a proper way
|
||||||
|
* of keeping sequence states consistent across the cluster.
|
||||||
|
*/
|
||||||
|
ErrorIfTableHasIdentityColumn(con->relationId);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* when there are many partitions or colocated tables, memory usage is
|
||||||
|
* accumulated. Free context for each call to ConvertTable.
|
||||||
|
*/
|
||||||
|
MemoryContext convertTableContext =
|
||||||
|
AllocSetContextCreate(CurrentMemoryContext,
|
||||||
|
"citus_convert_table_context",
|
||||||
|
ALLOCSET_DEFAULT_SIZES);
|
||||||
|
MemoryContext oldContext = MemoryContextSwitchTo(convertTableContext);
|
||||||
|
|
||||||
|
TableConversionReturn *tableConversionReturn = ConvertTableInternal(con);
|
||||||
|
|
||||||
|
MemoryContextSwitchTo(oldContext);
|
||||||
|
|
||||||
|
/* persist TableConversionReturn in oldContext */
|
||||||
|
TableConversionReturn *tableConversionReturnCopy =
|
||||||
|
CopyTableConversionReturnIntoCurrentContext(tableConversionReturn);
|
||||||
|
|
||||||
|
/* delete convertTableContext */
|
||||||
|
MemoryContextDelete(convertTableContext);
|
||||||
|
|
||||||
|
return tableConversionReturnCopy;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* DropIndexesNotSupportedByColumnar is a helper function used during accces
|
* DropIndexesNotSupportedByColumnar is a helper function used during accces
|
||||||
* method conversion to drop the indexes that are not supported by columnarAM.
|
* method conversion to drop the indexes that are not supported by columnarAM.
|
||||||
|
@ -1268,8 +1369,7 @@ CreateCitusTableLike(TableConversionState *con)
|
||||||
}
|
}
|
||||||
else if (IsCitusTableType(con->relationId, REFERENCE_TABLE))
|
else if (IsCitusTableType(con->relationId, REFERENCE_TABLE))
|
||||||
{
|
{
|
||||||
CreateDistributedTable(con->newRelationId, NULL, DISTRIBUTE_BY_NONE, 0, false,
|
CreateReferenceTable(con->newRelationId);
|
||||||
NULL);
|
|
||||||
}
|
}
|
||||||
else if (IsCitusTableType(con->relationId, CITUS_LOCAL_TABLE))
|
else if (IsCitusTableType(con->relationId, CITUS_LOCAL_TABLE))
|
||||||
{
|
{
|
||||||
|
@ -1410,17 +1510,16 @@ GetViewCreationCommandsOfTable(Oid relationId)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GetViewCreationTableDDLCommandsOfTable is the same as GetViewCreationCommandsOfTable,
|
* WrapTableDDLCommands takes a list of command strings and wraps them
|
||||||
* but the returned list includes objects of TableDDLCommand's, not strings.
|
* in TableDDLCommand structs.
|
||||||
*/
|
*/
|
||||||
List *
|
static List *
|
||||||
GetViewCreationTableDDLCommandsOfTable(Oid relationId)
|
WrapTableDDLCommands(List *commandStrings)
|
||||||
{
|
{
|
||||||
List *commands = GetViewCreationCommandsOfTable(relationId);
|
|
||||||
List *tableDDLCommands = NIL;
|
List *tableDDLCommands = NIL;
|
||||||
|
|
||||||
char *command = NULL;
|
char *command = NULL;
|
||||||
foreach_ptr(command, commands)
|
foreach_ptr(command, commandStrings)
|
||||||
{
|
{
|
||||||
tableDDLCommands = lappend(tableDDLCommands, makeTableDDLCommandString(command));
|
tableDDLCommands = lappend(tableDDLCommands, makeTableDDLCommandString(command));
|
||||||
}
|
}
|
||||||
|
@ -1523,96 +1622,6 @@ CreateMaterializedViewDDLCommand(Oid matViewOid)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This function marks all the identity sequences as distributed on the given table.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
MarkIdentitiesAsDistributed(Oid targetRelationId)
|
|
||||||
{
|
|
||||||
Relation relation = relation_open(targetRelationId, AccessShareLock);
|
|
||||||
TupleDesc tupleDescriptor = RelationGetDescr(relation);
|
|
||||||
relation_close(relation, NoLock);
|
|
||||||
|
|
||||||
bool missingSequenceOk = false;
|
|
||||||
|
|
||||||
for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts;
|
|
||||||
attributeIndex++)
|
|
||||||
{
|
|
||||||
Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex);
|
|
||||||
|
|
||||||
if (attributeForm->attidentity)
|
|
||||||
{
|
|
||||||
Oid seqOid = getIdentitySequence(targetRelationId, attributeForm->attnum,
|
|
||||||
missingSequenceOk);
|
|
||||||
|
|
||||||
ObjectAddress seqAddress = { 0 };
|
|
||||||
ObjectAddressSet(seqAddress, RelationRelationId, seqOid);
|
|
||||||
MarkObjectDistributed(&seqAddress);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This function returns sql statements to rename identites on the given table
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
PrepareRenameIdentitiesCommands(Oid sourceRelationId, Oid targetRelationId,
|
|
||||||
List **outCoordinatorCommands, List **outWorkerCommands)
|
|
||||||
{
|
|
||||||
Relation targetRelation = relation_open(targetRelationId, AccessShareLock);
|
|
||||||
TupleDesc targetTupleDescriptor = RelationGetDescr(targetRelation);
|
|
||||||
relation_close(targetRelation, NoLock);
|
|
||||||
|
|
||||||
bool missingSequenceOk = false;
|
|
||||||
|
|
||||||
for (int attributeIndex = 0; attributeIndex < targetTupleDescriptor->natts;
|
|
||||||
attributeIndex++)
|
|
||||||
{
|
|
||||||
Form_pg_attribute attributeForm = TupleDescAttr(targetTupleDescriptor,
|
|
||||||
attributeIndex);
|
|
||||||
|
|
||||||
if (attributeForm->attidentity)
|
|
||||||
{
|
|
||||||
char *columnName = NameStr(attributeForm->attname);
|
|
||||||
|
|
||||||
Oid targetSequenceOid = getIdentitySequence(targetRelationId,
|
|
||||||
attributeForm->attnum,
|
|
||||||
missingSequenceOk);
|
|
||||||
char *targetSequenceName = generate_relation_name(targetSequenceOid, NIL);
|
|
||||||
|
|
||||||
Oid sourceSequenceOid = getIdentitySequence(sourceRelationId,
|
|
||||||
attributeForm->attnum,
|
|
||||||
missingSequenceOk);
|
|
||||||
char *sourceSequenceName = generate_relation_name(sourceSequenceOid, NIL);
|
|
||||||
|
|
||||||
/* to rename sequence on the coordinator */
|
|
||||||
*outCoordinatorCommands = lappend(*outCoordinatorCommands, psprintf(
|
|
||||||
"SET citus.enable_ddl_propagation TO OFF; ALTER SEQUENCE %s RENAME TO %s; RESET citus.enable_ddl_propagation;",
|
|
||||||
quote_identifier(
|
|
||||||
targetSequenceName),
|
|
||||||
quote_identifier(
|
|
||||||
sourceSequenceName)));
|
|
||||||
|
|
||||||
/* update workers to use existing sequence and drop the new one generated by PG */
|
|
||||||
bool missingTableOk = true;
|
|
||||||
*outWorkerCommands = lappend(*outWorkerCommands,
|
|
||||||
GetAlterColumnWithNextvalDefaultCmd(
|
|
||||||
sourceSequenceOid, sourceRelationId,
|
|
||||||
columnName,
|
|
||||||
missingTableOk));
|
|
||||||
|
|
||||||
|
|
||||||
/* drop the sequence generated by identity column */
|
|
||||||
*outWorkerCommands = lappend(*outWorkerCommands, psprintf(
|
|
||||||
"DROP SEQUENCE IF EXISTS %s",
|
|
||||||
quote_identifier(
|
|
||||||
targetSequenceName)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ReplaceTable replaces the source table with the target table.
|
* ReplaceTable replaces the source table with the target table.
|
||||||
* It moves all the rows of the source table to target table with INSERT SELECT.
|
* It moves all the rows of the source table to target table with INSERT SELECT.
|
||||||
|
@ -1671,24 +1680,6 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands,
|
||||||
ExecuteQueryViaSPI(query->data, SPI_OK_INSERT);
|
ExecuteQueryViaSPI(query->data, SPI_OK_INSERT);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Drop identity dependencies (sequences marked as DEPENDENCY_INTERNAL) on the workers
|
|
||||||
* to keep their states after the source table is dropped.
|
|
||||||
*/
|
|
||||||
List *ownedIdentitySequences = getOwnedSequences_internal(sourceId, 0,
|
|
||||||
DEPENDENCY_INTERNAL);
|
|
||||||
if (ownedIdentitySequences != NIL && ShouldSyncTableMetadata(sourceId))
|
|
||||||
{
|
|
||||||
char *qualifiedTableName = quote_qualified_identifier(schemaName, sourceName);
|
|
||||||
StringInfo command = makeStringInfo();
|
|
||||||
|
|
||||||
appendStringInfo(command,
|
|
||||||
"SELECT pg_catalog.worker_drop_sequence_dependency(%s);",
|
|
||||||
quote_literal_cstr(qualifiedTableName));
|
|
||||||
|
|
||||||
SendCommandToWorkersWithMetadata(command->data);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Modify regular sequence dependencies (sequences marked as DEPENDENCY_AUTO)
|
* Modify regular sequence dependencies (sequences marked as DEPENDENCY_AUTO)
|
||||||
*/
|
*/
|
||||||
|
@ -1748,23 +1739,6 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands,
|
||||||
quote_qualified_identifier(schemaName, sourceName))));
|
quote_qualified_identifier(schemaName, sourceName))));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* We need to prepare rename identities commands before dropping the original table,
|
|
||||||
* otherwise we can't find the original names of the identity sequences.
|
|
||||||
* We prepare separate commands for the coordinator and the workers because:
|
|
||||||
* In the coordinator, we simply need to rename the identity sequences
|
|
||||||
* to their names on the old table, because right now the identity
|
|
||||||
* sequences have default names generated by Postgres with the creation of the new table
|
|
||||||
* In the workers, we have not dropped the original identity sequences,
|
|
||||||
* so what we do is we alter the columns and set their default to the
|
|
||||||
* original identity sequences, and after that we drop the new sequences.
|
|
||||||
*/
|
|
||||||
List *coordinatorCommandsToRenameIdentites = NIL;
|
|
||||||
List *workerCommandsToRenameIdentites = NIL;
|
|
||||||
PrepareRenameIdentitiesCommands(sourceId, targetId,
|
|
||||||
&coordinatorCommandsToRenameIdentites,
|
|
||||||
&workerCommandsToRenameIdentites);
|
|
||||||
|
|
||||||
resetStringInfo(query);
|
resetStringInfo(query);
|
||||||
appendStringInfo(query, "DROP %sTABLE %s CASCADE",
|
appendStringInfo(query, "DROP %sTABLE %s CASCADE",
|
||||||
IsForeignTable(sourceId) ? "FOREIGN " : "",
|
IsForeignTable(sourceId) ? "FOREIGN " : "",
|
||||||
|
@ -1782,27 +1756,6 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands,
|
||||||
quote_qualified_identifier(schemaName, targetName),
|
quote_qualified_identifier(schemaName, targetName),
|
||||||
quote_identifier(sourceName));
|
quote_identifier(sourceName));
|
||||||
ExecuteQueryViaSPI(query->data, SPI_OK_UTILITY);
|
ExecuteQueryViaSPI(query->data, SPI_OK_UTILITY);
|
||||||
|
|
||||||
char *coordinatorCommand = NULL;
|
|
||||||
foreach_ptr(coordinatorCommand, coordinatorCommandsToRenameIdentites)
|
|
||||||
{
|
|
||||||
ExecuteQueryViaSPI(coordinatorCommand, SPI_OK_UTILITY);
|
|
||||||
}
|
|
||||||
|
|
||||||
char *workerCommand = NULL;
|
|
||||||
foreach_ptr(workerCommand, workerCommandsToRenameIdentites)
|
|
||||||
{
|
|
||||||
SendCommandToWorkersWithMetadata(workerCommand);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* To preserve identity sequences states in case of redistributing the table again,
|
|
||||||
* we don't drop them when we undistribute a table. To maintain consistency and
|
|
||||||
* avoid future problems if we redistribute the table, we want to apply all changes happening to
|
|
||||||
* the identity sequence in the coordinator to their corresponding sequences in the workers as well.
|
|
||||||
* That's why we have to mark identity sequences as distributed
|
|
||||||
*/
|
|
||||||
MarkIdentitiesAsDistributed(targetId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -85,6 +85,7 @@ static void DropRelationTruncateTriggers(Oid relationId);
|
||||||
static char * GetDropTriggerCommand(Oid relationId, char *triggerName);
|
static char * GetDropTriggerCommand(Oid relationId, char *triggerName);
|
||||||
static void DropViewsOnTable(Oid relationId);
|
static void DropViewsOnTable(Oid relationId);
|
||||||
static void DropIdentitiesOnTable(Oid relationId);
|
static void DropIdentitiesOnTable(Oid relationId);
|
||||||
|
static void DropTableFromPublications(Oid relationId);
|
||||||
static List * GetRenameStatsCommandList(List *statsOidList, uint64 shardId);
|
static List * GetRenameStatsCommandList(List *statsOidList, uint64 shardId);
|
||||||
static List * ReversedOidList(List *oidList);
|
static List * ReversedOidList(List *oidList);
|
||||||
static void AppendExplicitIndexIdsToList(Form_pg_index indexForm,
|
static void AppendExplicitIndexIdsToList(Form_pg_index indexForm,
|
||||||
|
@ -338,6 +339,10 @@ CreateCitusLocalTable(Oid relationId, bool cascadeViaForeignKeys, bool autoConve
|
||||||
List *shellTableDDLEvents = GetShellTableDDLEventsForCitusLocalTable(relationId);
|
List *shellTableDDLEvents = GetShellTableDDLEventsForCitusLocalTable(relationId);
|
||||||
List *tableViewCreationCommands = GetViewCreationCommandsOfTable(relationId);
|
List *tableViewCreationCommands = GetViewCreationCommandsOfTable(relationId);
|
||||||
|
|
||||||
|
bool isAdd = true;
|
||||||
|
List *alterPublicationCommands =
|
||||||
|
GetAlterPublicationDDLCommandsForTable(relationId, isAdd);
|
||||||
|
|
||||||
char *relationName = get_rel_name(relationId);
|
char *relationName = get_rel_name(relationId);
|
||||||
Oid relationSchemaId = get_rel_namespace(relationId);
|
Oid relationSchemaId = get_rel_namespace(relationId);
|
||||||
|
|
||||||
|
@ -347,6 +352,12 @@ CreateCitusLocalTable(Oid relationId, bool cascadeViaForeignKeys, bool autoConve
|
||||||
*/
|
*/
|
||||||
DropIdentitiesOnTable(relationId);
|
DropIdentitiesOnTable(relationId);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We do not want the shard to be in the publication (subscribers are
|
||||||
|
* unlikely to recognize it).
|
||||||
|
*/
|
||||||
|
DropTableFromPublications(relationId);
|
||||||
|
|
||||||
/* below we convert relation with relationId to the shard relation */
|
/* below we convert relation with relationId to the shard relation */
|
||||||
uint64 shardId = ConvertLocalTableToShard(relationId);
|
uint64 shardId = ConvertLocalTableToShard(relationId);
|
||||||
|
|
||||||
|
@ -363,6 +374,11 @@ CreateCitusLocalTable(Oid relationId, bool cascadeViaForeignKeys, bool autoConve
|
||||||
*/
|
*/
|
||||||
ExecuteAndLogUtilityCommandListInTableTypeConversionViaSPI(tableViewCreationCommands);
|
ExecuteAndLogUtilityCommandListInTableTypeConversionViaSPI(tableViewCreationCommands);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Execute the publication creation commands with the shell table.
|
||||||
|
*/
|
||||||
|
ExecuteAndLogUtilityCommandListInTableTypeConversionViaSPI(alterPublicationCommands);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set shellRelationId as the relation with relationId now points
|
* Set shellRelationId as the relation with relationId now points
|
||||||
* to the shard relation.
|
* to the shard relation.
|
||||||
|
@ -1131,7 +1147,7 @@ DropIdentitiesOnTable(Oid relationId)
|
||||||
{
|
{
|
||||||
Relation relation = relation_open(relationId, AccessShareLock);
|
Relation relation = relation_open(relationId, AccessShareLock);
|
||||||
TupleDesc tupleDescriptor = RelationGetDescr(relation);
|
TupleDesc tupleDescriptor = RelationGetDescr(relation);
|
||||||
relation_close(relation, NoLock);
|
List *dropCommandList = NIL;
|
||||||
|
|
||||||
for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts;
|
for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts;
|
||||||
attributeIndex++)
|
attributeIndex++)
|
||||||
|
@ -1151,15 +1167,38 @@ DropIdentitiesOnTable(Oid relationId)
|
||||||
qualifiedTableName,
|
qualifiedTableName,
|
||||||
columnName);
|
columnName);
|
||||||
|
|
||||||
|
dropCommandList = lappend(dropCommandList, dropCommand->data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
relation_close(relation, NoLock);
|
||||||
|
|
||||||
|
char *dropCommand = NULL;
|
||||||
|
foreach_ptr(dropCommand, dropCommandList)
|
||||||
|
{
|
||||||
/*
|
/*
|
||||||
* We need to disable/enable ddl propagation for this command, to prevent
|
* We need to disable/enable ddl propagation for this command, to prevent
|
||||||
* sending unnecessary ALTER COLUMN commands for partitions, to MX workers.
|
* sending unnecessary ALTER COLUMN commands for partitions, to MX workers.
|
||||||
*/
|
*/
|
||||||
ExecuteAndLogUtilityCommandList(list_make3(DISABLE_DDL_PROPAGATION,
|
ExecuteAndLogUtilityCommandList(list_make3(DISABLE_DDL_PROPAGATION,
|
||||||
dropCommand->data,
|
dropCommand,
|
||||||
ENABLE_DDL_PROPAGATION));
|
ENABLE_DDL_PROPAGATION));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DropTableFromPublications drops the table from all of its publications.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
DropTableFromPublications(Oid relationId)
|
||||||
|
{
|
||||||
|
bool isAdd = false;
|
||||||
|
|
||||||
|
List *alterPublicationCommands =
|
||||||
|
GetAlterPublicationDDLCommandsForTable(relationId, isAdd);
|
||||||
|
|
||||||
|
ExecuteAndLogUtilityCommandList(alterPublicationCommands);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -94,6 +94,28 @@
|
||||||
#include "utils/syscache.h"
|
#include "utils/syscache.h"
|
||||||
#include "utils/inval.h"
|
#include "utils/inval.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* common params that apply to all Citus table types */
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
char distributionMethod;
|
||||||
|
char replicationModel;
|
||||||
|
} CitusTableParams;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Params that only apply to distributed tables, i.e., the ones that are
|
||||||
|
* known as DISTRIBUTED_TABLE by Citus metadata.
|
||||||
|
*/
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
int shardCount;
|
||||||
|
bool shardCountIsStrict;
|
||||||
|
char *colocateWithTableName;
|
||||||
|
char *distributionColumnName;
|
||||||
|
} DistributedTableParams;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* once every LOG_PER_TUPLE_AMOUNT, the copy will be logged.
|
* once every LOG_PER_TUPLE_AMOUNT, the copy will be logged.
|
||||||
*/
|
*/
|
||||||
|
@ -106,17 +128,22 @@ static void CreateDistributedTableConcurrently(Oid relationId,
|
||||||
char *colocateWithTableName,
|
char *colocateWithTableName,
|
||||||
int shardCount,
|
int shardCount,
|
||||||
bool shardCountIsStrict);
|
bool shardCountIsStrict);
|
||||||
static char DecideReplicationModel(char distributionMethod, char *colocateWithTableName);
|
static char DecideDistTableReplicationModel(char distributionMethod,
|
||||||
|
char *colocateWithTableName);
|
||||||
static List * HashSplitPointsForShardList(List *shardList);
|
static List * HashSplitPointsForShardList(List *shardList);
|
||||||
static List * HashSplitPointsForShardCount(int shardCount);
|
static List * HashSplitPointsForShardCount(int shardCount);
|
||||||
static List * WorkerNodesForShardList(List *shardList);
|
static List * WorkerNodesForShardList(List *shardList);
|
||||||
static List * RoundRobinWorkerNodeList(List *workerNodeList, int listLength);
|
static List * RoundRobinWorkerNodeList(List *workerNodeList, int listLength);
|
||||||
|
static CitusTableParams DecideCitusTableParams(CitusTableType tableType,
|
||||||
|
DistributedTableParams *
|
||||||
|
distributedTableParams);
|
||||||
|
static void CreateCitusTable(Oid relationId, CitusTableType tableType,
|
||||||
|
DistributedTableParams *distributedTableParams);
|
||||||
static void CreateHashDistributedTableShards(Oid relationId, int shardCount,
|
static void CreateHashDistributedTableShards(Oid relationId, int shardCount,
|
||||||
Oid colocatedTableId, bool localTableEmpty);
|
Oid colocatedTableId, bool localTableEmpty);
|
||||||
static uint32 ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
|
static uint32 ColocationIdForNewTable(Oid relationId, CitusTableType tableType,
|
||||||
char distributionMethod, char replicationModel,
|
DistributedTableParams *distributedTableParams,
|
||||||
int shardCount, bool shardCountIsStrict,
|
Var *distributionColumn);
|
||||||
char *colocateWithTableName);
|
|
||||||
static void EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn,
|
static void EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn,
|
||||||
char distributionMethod, uint32 colocationId,
|
char distributionMethod, uint32 colocationId,
|
||||||
char replicationModel);
|
char replicationModel);
|
||||||
|
@ -377,7 +404,7 @@ CreateDistributedTableConcurrently(Oid relationId, char *distributionColumnName,
|
||||||
|
|
||||||
EnsureForeignKeysForDistributedTableConcurrently(relationId);
|
EnsureForeignKeysForDistributedTableConcurrently(relationId);
|
||||||
|
|
||||||
char replicationModel = DecideReplicationModel(distributionMethod,
|
char replicationModel = DecideDistTableReplicationModel(distributionMethod,
|
||||||
colocateWithTableName);
|
colocateWithTableName);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -622,7 +649,7 @@ static void
|
||||||
EnsureColocateWithTableIsValid(Oid relationId, char distributionMethod,
|
EnsureColocateWithTableIsValid(Oid relationId, char distributionMethod,
|
||||||
char *distributionColumnName, char *colocateWithTableName)
|
char *distributionColumnName, char *colocateWithTableName)
|
||||||
{
|
{
|
||||||
char replicationModel = DecideReplicationModel(distributionMethod,
|
char replicationModel = DecideDistTableReplicationModel(distributionMethod,
|
||||||
colocateWithTableName);
|
colocateWithTableName);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -860,9 +887,6 @@ create_reference_table(PG_FUNCTION_ARGS)
|
||||||
CheckCitusVersion(ERROR);
|
CheckCitusVersion(ERROR);
|
||||||
Oid relationId = PG_GETARG_OID(0);
|
Oid relationId = PG_GETARG_OID(0);
|
||||||
|
|
||||||
char *colocateWithTableName = NULL;
|
|
||||||
char *distributionColumnName = NULL;
|
|
||||||
|
|
||||||
EnsureCitusTableCanBeCreated(relationId);
|
EnsureCitusTableCanBeCreated(relationId);
|
||||||
|
|
||||||
/* enable create_reference_table on an empty node */
|
/* enable create_reference_table on an empty node */
|
||||||
|
@ -895,8 +919,7 @@ create_reference_table(PG_FUNCTION_ARGS)
|
||||||
errdetail("There are no active worker nodes.")));
|
errdetail("There are no active worker nodes.")));
|
||||||
}
|
}
|
||||||
|
|
||||||
CreateDistributedTable(relationId, distributionColumnName, DISTRIBUTE_BY_NONE,
|
CreateReferenceTable(relationId);
|
||||||
ShardCount, false, colocateWithTableName);
|
|
||||||
PG_RETURN_VOID();
|
PG_RETURN_VOID();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -951,18 +974,90 @@ EnsureRelationExists(Oid relationId)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CreateDistributedTable creates distributed table in the given configuration.
|
* CreateReferenceTable is a wrapper around CreateCitusTable that creates a
|
||||||
|
* distributed table.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
CreateDistributedTable(Oid relationId, char *distributionColumnName,
|
||||||
|
char distributionMethod,
|
||||||
|
int shardCount, bool shardCountIsStrict,
|
||||||
|
char *colocateWithTableName)
|
||||||
|
{
|
||||||
|
CitusTableType tableType;
|
||||||
|
switch (distributionMethod)
|
||||||
|
{
|
||||||
|
case DISTRIBUTE_BY_HASH:
|
||||||
|
{
|
||||||
|
tableType = HASH_DISTRIBUTED;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case DISTRIBUTE_BY_APPEND:
|
||||||
|
{
|
||||||
|
tableType = APPEND_DISTRIBUTED;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case DISTRIBUTE_BY_RANGE:
|
||||||
|
{
|
||||||
|
tableType = RANGE_DISTRIBUTED;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("unexpected distribution method when "
|
||||||
|
"deciding Citus table type")));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DistributedTableParams distributedTableParams = {
|
||||||
|
.colocateWithTableName = colocateWithTableName,
|
||||||
|
.shardCount = shardCount,
|
||||||
|
.shardCountIsStrict = shardCountIsStrict,
|
||||||
|
.distributionColumnName = distributionColumnName
|
||||||
|
};
|
||||||
|
CreateCitusTable(relationId, tableType, &distributedTableParams);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CreateReferenceTable is a wrapper around CreateCitusTable that creates a
|
||||||
|
* reference table.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
CreateReferenceTable(Oid relationId)
|
||||||
|
{
|
||||||
|
CreateCitusTable(relationId, REFERENCE_TABLE, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CreateCitusTable is the internal method that creates a Citus table in
|
||||||
|
* given configuration.
|
||||||
|
*
|
||||||
|
* DistributedTableParams should be non-null only if we're creating a distributed
|
||||||
|
* table.
|
||||||
|
*
|
||||||
* This functions contains all necessary logic to create distributed tables. It
|
* This functions contains all necessary logic to create distributed tables. It
|
||||||
* performs necessary checks to ensure distributing the table is safe. If it is
|
* performs necessary checks to ensure distributing the table is safe. If it is
|
||||||
* safe to distribute the table, this function creates distributed table metadata,
|
* safe to distribute the table, this function creates distributed table metadata,
|
||||||
* creates shards and copies local data to shards. This function also handles
|
* creates shards and copies local data to shards. This function also handles
|
||||||
* partitioned tables by distributing its partitions as well.
|
* partitioned tables by distributing its partitions as well.
|
||||||
*/
|
*/
|
||||||
void
|
static void
|
||||||
CreateDistributedTable(Oid relationId, char *distributionColumnName,
|
CreateCitusTable(Oid relationId, CitusTableType tableType,
|
||||||
char distributionMethod, int shardCount,
|
DistributedTableParams *distributedTableParams)
|
||||||
bool shardCountIsStrict, char *colocateWithTableName)
|
|
||||||
{
|
{
|
||||||
|
if ((tableType == HASH_DISTRIBUTED || tableType == APPEND_DISTRIBUTED ||
|
||||||
|
tableType == RANGE_DISTRIBUTED) != (distributedTableParams != NULL))
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("distributed table params must be provided "
|
||||||
|
"when creating a distributed table and must "
|
||||||
|
"not be otherwise")));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* EnsureTableNotDistributed errors out when relation is a citus table but
|
* EnsureTableNotDistributed errors out when relation is a citus table but
|
||||||
* we don't want to ask user to first undistribute their citus local tables
|
* we don't want to ask user to first undistribute their citus local tables
|
||||||
|
@ -988,11 +1083,8 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
|
||||||
* that ALTER TABLE hook does the necessary job, which means converting
|
* that ALTER TABLE hook does the necessary job, which means converting
|
||||||
* local tables to citus local tables to properly support such foreign
|
* local tables to citus local tables to properly support such foreign
|
||||||
* keys.
|
* keys.
|
||||||
*
|
|
||||||
* This function does not expect to create Citus local table, so we blindly
|
|
||||||
* create reference table when the method is DISTRIBUTE_BY_NONE.
|
|
||||||
*/
|
*/
|
||||||
else if (distributionMethod == DISTRIBUTE_BY_NONE &&
|
else if (tableType == REFERENCE_TABLE &&
|
||||||
ShouldEnableLocalReferenceForeignKeys() &&
|
ShouldEnableLocalReferenceForeignKeys() &&
|
||||||
HasForeignKeyWithLocalTable(relationId))
|
HasForeignKeyWithLocalTable(relationId))
|
||||||
{
|
{
|
||||||
|
@ -1022,24 +1114,29 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
|
||||||
|
|
||||||
PropagatePrerequisiteObjectsForDistributedTable(relationId);
|
PropagatePrerequisiteObjectsForDistributedTable(relationId);
|
||||||
|
|
||||||
char replicationModel = DecideReplicationModel(distributionMethod,
|
Var *distributionColumn = NULL;
|
||||||
colocateWithTableName);
|
if (distributedTableParams)
|
||||||
|
{
|
||||||
Var *distributionColumn = BuildDistributionKeyFromColumnName(relationId,
|
distributionColumn = BuildDistributionKeyFromColumnName(relationId,
|
||||||
|
distributedTableParams->
|
||||||
distributionColumnName,
|
distributionColumnName,
|
||||||
NoLock);
|
NoLock);
|
||||||
|
}
|
||||||
|
|
||||||
|
CitusTableParams citusTableParams = DecideCitusTableParams(tableType,
|
||||||
|
distributedTableParams);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ColocationIdForNewTable assumes caller acquires lock on relationId. In our case,
|
* ColocationIdForNewTable assumes caller acquires lock on relationId. In our case,
|
||||||
* our caller already acquired lock on relationId.
|
* our caller already acquired lock on relationId.
|
||||||
*/
|
*/
|
||||||
uint32 colocationId = ColocationIdForNewTable(relationId, distributionColumn,
|
uint32 colocationId = ColocationIdForNewTable(relationId, tableType,
|
||||||
distributionMethod, replicationModel,
|
distributedTableParams,
|
||||||
shardCount, shardCountIsStrict,
|
distributionColumn);
|
||||||
colocateWithTableName);
|
|
||||||
|
|
||||||
EnsureRelationCanBeDistributed(relationId, distributionColumn, distributionMethod,
|
EnsureRelationCanBeDistributed(relationId, distributionColumn,
|
||||||
colocationId, replicationModel);
|
citusTableParams.distributionMethod,
|
||||||
|
colocationId, citusTableParams.replicationModel);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make sure that existing reference tables have been replicated to all the nodes
|
* Make sure that existing reference tables have been replicated to all the nodes
|
||||||
|
@ -1068,8 +1165,10 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
|
||||||
bool autoConverted = false;
|
bool autoConverted = false;
|
||||||
|
|
||||||
/* create an entry for distributed table in pg_dist_partition */
|
/* create an entry for distributed table in pg_dist_partition */
|
||||||
InsertIntoPgDistPartition(relationId, distributionMethod, distributionColumn,
|
InsertIntoPgDistPartition(relationId, citusTableParams.distributionMethod,
|
||||||
colocationId, replicationModel, autoConverted);
|
distributionColumn,
|
||||||
|
colocationId, citusTableParams.replicationModel,
|
||||||
|
autoConverted);
|
||||||
|
|
||||||
/* foreign tables do not support TRUNCATE trigger */
|
/* foreign tables do not support TRUNCATE trigger */
|
||||||
if (RegularTable(relationId))
|
if (RegularTable(relationId))
|
||||||
|
@ -1078,17 +1177,14 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* create shards for hash distributed and reference tables */
|
/* create shards for hash distributed and reference tables */
|
||||||
if (distributionMethod == DISTRIBUTE_BY_HASH)
|
if (tableType == HASH_DISTRIBUTED)
|
||||||
{
|
{
|
||||||
CreateHashDistributedTableShards(relationId, shardCount, colocatedTableId,
|
CreateHashDistributedTableShards(relationId, distributedTableParams->shardCount,
|
||||||
|
colocatedTableId,
|
||||||
localTableEmpty);
|
localTableEmpty);
|
||||||
}
|
}
|
||||||
else if (distributionMethod == DISTRIBUTE_BY_NONE)
|
else if (tableType == REFERENCE_TABLE)
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
* This function does not expect to create Citus local table, so we blindly
|
|
||||||
* create reference table when the method is DISTRIBUTE_BY_NONE.
|
|
||||||
*/
|
|
||||||
CreateReferenceTableShard(relationId);
|
CreateReferenceTableShard(relationId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1116,17 +1212,36 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
|
||||||
char *relationName = get_rel_name(relationId);
|
char *relationName = get_rel_name(relationId);
|
||||||
char *parentRelationName = quote_qualified_identifier(schemaName, relationName);
|
char *parentRelationName = quote_qualified_identifier(schemaName, relationName);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* when there are many partitions, each call to CreateDistributedTable
|
||||||
|
* accumulates used memory. Create and free context for each call.
|
||||||
|
*/
|
||||||
|
MemoryContext citusPartitionContext =
|
||||||
|
AllocSetContextCreate(CurrentMemoryContext,
|
||||||
|
"citus_per_partition_context",
|
||||||
|
ALLOCSET_DEFAULT_SIZES);
|
||||||
|
MemoryContext oldContext = MemoryContextSwitchTo(citusPartitionContext);
|
||||||
|
|
||||||
foreach_oid(partitionRelationId, partitionList)
|
foreach_oid(partitionRelationId, partitionList)
|
||||||
{
|
{
|
||||||
CreateDistributedTable(partitionRelationId, distributionColumnName,
|
MemoryContextReset(citusPartitionContext);
|
||||||
distributionMethod, shardCount, false,
|
|
||||||
parentRelationName);
|
DistributedTableParams childDistributedTableParams = {
|
||||||
|
.colocateWithTableName = parentRelationName,
|
||||||
|
.shardCount = distributedTableParams->shardCount,
|
||||||
|
.shardCountIsStrict = false,
|
||||||
|
.distributionColumnName = distributedTableParams->distributionColumnName,
|
||||||
|
};
|
||||||
|
CreateCitusTable(partitionRelationId, tableType,
|
||||||
|
&childDistributedTableParams);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MemoryContextSwitchTo(oldContext);
|
||||||
|
MemoryContextDelete(citusPartitionContext);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* copy over data for hash distributed and reference tables */
|
/* copy over data for hash distributed and reference tables */
|
||||||
if (distributionMethod == DISTRIBUTE_BY_HASH ||
|
if (tableType == HASH_DISTRIBUTED || tableType == REFERENCE_TABLE)
|
||||||
distributionMethod == DISTRIBUTE_BY_NONE)
|
|
||||||
{
|
{
|
||||||
if (RegularTable(relationId))
|
if (RegularTable(relationId))
|
||||||
{
|
{
|
||||||
|
@ -1145,6 +1260,70 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DecideCitusTableParams decides CitusTableParams based on given CitusTableType
|
||||||
|
* and DistributedTableParams if it's a distributed table.
|
||||||
|
*
|
||||||
|
* DistributedTableParams should be non-null only if CitusTableType corresponds
|
||||||
|
* to a distributed table.
|
||||||
|
*/
|
||||||
|
static
|
||||||
|
CitusTableParams
|
||||||
|
DecideCitusTableParams(CitusTableType tableType,
|
||||||
|
DistributedTableParams *distributedTableParams)
|
||||||
|
{
|
||||||
|
CitusTableParams citusTableParams = { 0 };
|
||||||
|
switch (tableType)
|
||||||
|
{
|
||||||
|
case HASH_DISTRIBUTED:
|
||||||
|
{
|
||||||
|
citusTableParams.distributionMethod = DISTRIBUTE_BY_HASH;
|
||||||
|
citusTableParams.replicationModel =
|
||||||
|
DecideDistTableReplicationModel(DISTRIBUTE_BY_HASH,
|
||||||
|
distributedTableParams->
|
||||||
|
colocateWithTableName);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case APPEND_DISTRIBUTED:
|
||||||
|
{
|
||||||
|
citusTableParams.distributionMethod = DISTRIBUTE_BY_APPEND;
|
||||||
|
citusTableParams.replicationModel =
|
||||||
|
DecideDistTableReplicationModel(APPEND_DISTRIBUTED,
|
||||||
|
distributedTableParams->
|
||||||
|
colocateWithTableName);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case RANGE_DISTRIBUTED:
|
||||||
|
{
|
||||||
|
citusTableParams.distributionMethod = DISTRIBUTE_BY_RANGE;
|
||||||
|
citusTableParams.replicationModel =
|
||||||
|
DecideDistTableReplicationModel(RANGE_DISTRIBUTED,
|
||||||
|
distributedTableParams->
|
||||||
|
colocateWithTableName);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case REFERENCE_TABLE:
|
||||||
|
{
|
||||||
|
citusTableParams.distributionMethod = DISTRIBUTE_BY_NONE;
|
||||||
|
citusTableParams.replicationModel = REPLICATION_MODEL_2PC;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("unexpected table type when deciding Citus "
|
||||||
|
"table params")));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return citusTableParams;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* PropagatePrerequisiteObjectsForDistributedTable ensures we can create shards
|
* PropagatePrerequisiteObjectsForDistributedTable ensures we can create shards
|
||||||
* on all nodes by ensuring all dependent objects exist on all node.
|
* on all nodes by ensuring all dependent objects exist on all node.
|
||||||
|
@ -1190,7 +1369,7 @@ EnsureSequenceTypeSupported(Oid seqOid, Oid attributeTypeId, Oid ownerRelationId
|
||||||
foreach_oid(citusTableId, citusTableIdList)
|
foreach_oid(citusTableId, citusTableIdList)
|
||||||
{
|
{
|
||||||
List *seqInfoList = NIL;
|
List *seqInfoList = NIL;
|
||||||
GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0);
|
GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0, DEPENDENCY_AUTO);
|
||||||
|
|
||||||
SequenceInfo *seqInfo = NULL;
|
SequenceInfo *seqInfo = NULL;
|
||||||
foreach_ptr(seqInfo, seqInfoList)
|
foreach_ptr(seqInfo, seqInfoList)
|
||||||
|
@ -1267,7 +1446,7 @@ EnsureRelationHasCompatibleSequenceTypes(Oid relationId)
|
||||||
{
|
{
|
||||||
List *seqInfoList = NIL;
|
List *seqInfoList = NIL;
|
||||||
|
|
||||||
GetDependentSequencesWithRelation(relationId, &seqInfoList, 0);
|
GetDependentSequencesWithRelation(relationId, &seqInfoList, 0, DEPENDENCY_AUTO);
|
||||||
EnsureDistributedSequencesHaveOneType(relationId, seqInfoList);
|
EnsureDistributedSequencesHaveOneType(relationId, seqInfoList);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1405,17 +1584,15 @@ DropFKeysRelationInvolvedWithTableType(Oid relationId, int tableTypeFlag)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* DecideReplicationModel function decides which replication model should be
|
* DecideDistTableReplicationModel function decides which replication model should be
|
||||||
* used depending on given distribution configuration.
|
* used for a distributed table depending on given distribution configuration.
|
||||||
*/
|
*/
|
||||||
static char
|
static char
|
||||||
DecideReplicationModel(char distributionMethod, char *colocateWithTableName)
|
DecideDistTableReplicationModel(char distributionMethod, char *colocateWithTableName)
|
||||||
{
|
{
|
||||||
if (distributionMethod == DISTRIBUTE_BY_NONE)
|
Assert(distributionMethod != DISTRIBUTE_BY_NONE);
|
||||||
{
|
|
||||||
return REPLICATION_MODEL_2PC;
|
if (!IsColocateWithDefault(colocateWithTableName) &&
|
||||||
}
|
|
||||||
else if (pg_strncasecmp(colocateWithTableName, "default", NAMEDATALEN) != 0 &&
|
|
||||||
!IsColocateWithNone(colocateWithTableName))
|
!IsColocateWithNone(colocateWithTableName))
|
||||||
{
|
{
|
||||||
text *colocateWithTableNameText = cstring_to_text(colocateWithTableName);
|
text *colocateWithTableNameText = cstring_to_text(colocateWithTableName);
|
||||||
|
@ -1491,28 +1668,34 @@ CreateHashDistributedTableShards(Oid relationId, int shardCount,
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ColocationIdForNewTable returns a colocation id for hash-distributed table
|
* ColocationIdForNewTable returns a colocation id for given table
|
||||||
* according to given configuration. If there is no such configuration, it
|
* according to given configuration. If there is no such configuration, it
|
||||||
* creates one and returns colocation id of newly the created colocation group.
|
* creates one and returns colocation id of newly the created colocation group.
|
||||||
|
* Note that DistributedTableParams and the distribution column Var should be
|
||||||
|
* non-null only if CitusTableType corresponds to a distributed table.
|
||||||
|
*
|
||||||
* For append and range distributed tables, this function errors out if
|
* For append and range distributed tables, this function errors out if
|
||||||
* colocateWithTableName parameter is not NULL, otherwise directly returns
|
* colocateWithTableName parameter is not NULL, otherwise directly returns
|
||||||
* INVALID_COLOCATION_ID.
|
* INVALID_COLOCATION_ID.
|
||||||
*
|
*
|
||||||
|
* For reference tables, returns the common reference table colocation id.
|
||||||
|
*
|
||||||
* This function assumes its caller take necessary lock on relationId to
|
* This function assumes its caller take necessary lock on relationId to
|
||||||
* prevent possible changes on it.
|
* prevent possible changes on it.
|
||||||
*/
|
*/
|
||||||
static uint32
|
static uint32
|
||||||
ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
|
ColocationIdForNewTable(Oid relationId, CitusTableType tableType,
|
||||||
char distributionMethod, char replicationModel,
|
DistributedTableParams *distributedTableParams,
|
||||||
int shardCount, bool shardCountIsStrict,
|
Var *distributionColumn)
|
||||||
char *colocateWithTableName)
|
|
||||||
{
|
{
|
||||||
|
CitusTableParams citusTableParams = DecideCitusTableParams(tableType,
|
||||||
|
distributedTableParams);
|
||||||
|
|
||||||
uint32 colocationId = INVALID_COLOCATION_ID;
|
uint32 colocationId = INVALID_COLOCATION_ID;
|
||||||
|
|
||||||
if (distributionMethod == DISTRIBUTE_BY_APPEND ||
|
if (tableType == APPEND_DISTRIBUTED || tableType == RANGE_DISTRIBUTED)
|
||||||
distributionMethod == DISTRIBUTE_BY_RANGE)
|
|
||||||
{
|
{
|
||||||
if (pg_strncasecmp(colocateWithTableName, "default", NAMEDATALEN) != 0)
|
if (!IsColocateWithDefault(distributedTableParams->colocateWithTableName))
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
errmsg("cannot distribute relation"),
|
errmsg("cannot distribute relation"),
|
||||||
|
@ -1522,7 +1705,7 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
|
||||||
|
|
||||||
return colocationId;
|
return colocationId;
|
||||||
}
|
}
|
||||||
else if (distributionMethod == DISTRIBUTE_BY_NONE)
|
else if (tableType == REFERENCE_TABLE)
|
||||||
{
|
{
|
||||||
return CreateReferenceTableColocationId();
|
return CreateReferenceTableColocationId();
|
||||||
}
|
}
|
||||||
|
@ -1533,27 +1716,29 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
|
||||||
* can be sure that there will no modifications on the colocation table
|
* can be sure that there will no modifications on the colocation table
|
||||||
* until this transaction is committed.
|
* until this transaction is committed.
|
||||||
*/
|
*/
|
||||||
Assert(distributionMethod == DISTRIBUTE_BY_HASH);
|
Assert(citusTableParams.distributionMethod == DISTRIBUTE_BY_HASH);
|
||||||
|
|
||||||
Oid distributionColumnType = distributionColumn->vartype;
|
Oid distributionColumnType = distributionColumn->vartype;
|
||||||
Oid distributionColumnCollation = get_typcollation(distributionColumnType);
|
Oid distributionColumnCollation = get_typcollation(distributionColumnType);
|
||||||
|
|
||||||
/* get an advisory lock to serialize concurrent default group creations */
|
/* get an advisory lock to serialize concurrent default group creations */
|
||||||
if (IsColocateWithDefault(colocateWithTableName))
|
if (IsColocateWithDefault(distributedTableParams->colocateWithTableName))
|
||||||
{
|
{
|
||||||
AcquireColocationDefaultLock();
|
AcquireColocationDefaultLock();
|
||||||
}
|
}
|
||||||
|
|
||||||
colocationId = FindColocateWithColocationId(relationId,
|
colocationId = FindColocateWithColocationId(relationId,
|
||||||
replicationModel,
|
citusTableParams.replicationModel,
|
||||||
distributionColumnType,
|
distributionColumnType,
|
||||||
distributionColumnCollation,
|
distributionColumnCollation,
|
||||||
shardCount,
|
distributedTableParams->shardCount,
|
||||||
|
distributedTableParams->
|
||||||
shardCountIsStrict,
|
shardCountIsStrict,
|
||||||
|
distributedTableParams->
|
||||||
colocateWithTableName);
|
colocateWithTableName);
|
||||||
|
|
||||||
if (IsColocateWithDefault(colocateWithTableName) && (colocationId !=
|
if (IsColocateWithDefault(distributedTableParams->colocateWithTableName) &&
|
||||||
INVALID_COLOCATION_ID))
|
(colocationId != INVALID_COLOCATION_ID))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* we can release advisory lock if there is already a default entry for given params;
|
* we can release advisory lock if there is already a default entry for given params;
|
||||||
|
@ -1565,23 +1750,25 @@ ColocationIdForNewTable(Oid relationId, Var *distributionColumn,
|
||||||
|
|
||||||
if (colocationId == INVALID_COLOCATION_ID)
|
if (colocationId == INVALID_COLOCATION_ID)
|
||||||
{
|
{
|
||||||
if (IsColocateWithDefault(colocateWithTableName))
|
if (IsColocateWithDefault(distributedTableParams->colocateWithTableName))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Generate a new colocation ID and insert a pg_dist_colocation
|
* Generate a new colocation ID and insert a pg_dist_colocation
|
||||||
* record.
|
* record.
|
||||||
*/
|
*/
|
||||||
colocationId = CreateColocationGroup(shardCount, ShardReplicationFactor,
|
colocationId = CreateColocationGroup(distributedTableParams->shardCount,
|
||||||
|
ShardReplicationFactor,
|
||||||
distributionColumnType,
|
distributionColumnType,
|
||||||
distributionColumnCollation);
|
distributionColumnCollation);
|
||||||
}
|
}
|
||||||
else if (IsColocateWithNone(colocateWithTableName))
|
else if (IsColocateWithNone(distributedTableParams->colocateWithTableName))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Generate a new colocation ID and insert a pg_dist_colocation
|
* Generate a new colocation ID and insert a pg_dist_colocation
|
||||||
* record.
|
* record.
|
||||||
*/
|
*/
|
||||||
colocationId = CreateColocationGroup(shardCount, ShardReplicationFactor,
|
colocationId = CreateColocationGroup(distributedTableParams->shardCount,
|
||||||
|
ShardReplicationFactor,
|
||||||
distributionColumnType,
|
distributionColumnType,
|
||||||
distributionColumnCollation);
|
distributionColumnCollation);
|
||||||
}
|
}
|
||||||
|
@ -1608,6 +1795,8 @@ EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn,
|
||||||
{
|
{
|
||||||
Oid parentRelationId = InvalidOid;
|
Oid parentRelationId = InvalidOid;
|
||||||
|
|
||||||
|
ErrorIfTableHasUnsupportedIdentityColumn(relationId);
|
||||||
|
|
||||||
EnsureLocalTableEmptyIfNecessary(relationId, distributionMethod);
|
EnsureLocalTableEmptyIfNecessary(relationId, distributionMethod);
|
||||||
|
|
||||||
/* user really wants triggers? */
|
/* user really wants triggers? */
|
||||||
|
@ -2219,12 +2408,12 @@ CopyLocalDataIntoShards(Oid distributedRelationId)
|
||||||
EState *estate = CreateExecutorState();
|
EState *estate = CreateExecutorState();
|
||||||
ExprContext *econtext = GetPerTupleExprContext(estate);
|
ExprContext *econtext = GetPerTupleExprContext(estate);
|
||||||
econtext->ecxt_scantuple = slot;
|
econtext->ecxt_scantuple = slot;
|
||||||
|
const bool nonPublishableData = false;
|
||||||
DestReceiver *copyDest =
|
DestReceiver *copyDest =
|
||||||
(DestReceiver *) CreateCitusCopyDestReceiver(distributedRelationId,
|
(DestReceiver *) CreateCitusCopyDestReceiver(distributedRelationId,
|
||||||
columnNameList,
|
columnNameList,
|
||||||
partitionColumnIndex,
|
partitionColumnIndex,
|
||||||
estate, NULL);
|
estate, NULL, nonPublishableData);
|
||||||
|
|
||||||
/* initialise state for writing to shards, we'll open connections on demand */
|
/* initialise state for writing to shards, we'll open connections on demand */
|
||||||
copyDest->rStartup(copyDest, 0, tupleDescriptor);
|
copyDest->rStartup(copyDest, 0, tupleDescriptor);
|
||||||
|
|
|
@ -29,16 +29,14 @@
|
||||||
#include "storage/lmgr.h"
|
#include "storage/lmgr.h"
|
||||||
#include "utils/lsyscache.h"
|
#include "utils/lsyscache.h"
|
||||||
|
|
||||||
typedef bool (*AddressPredicate)(const ObjectAddress *);
|
|
||||||
|
|
||||||
static void EnsureDependenciesCanBeDistributed(const ObjectAddress *relationAddress);
|
static void EnsureDependenciesCanBeDistributed(const ObjectAddress *relationAddress);
|
||||||
static void ErrorIfCircularDependencyExists(const ObjectAddress *objectAddress);
|
static void ErrorIfCircularDependencyExists(const ObjectAddress *objectAddress);
|
||||||
static int ObjectAddressComparator(const void *a, const void *b);
|
static int ObjectAddressComparator(const void *a, const void *b);
|
||||||
static List * FilterObjectAddressListByPredicate(List *objectAddressList,
|
|
||||||
AddressPredicate predicate);
|
|
||||||
static void EnsureDependenciesExistOnAllNodes(const ObjectAddress *target);
|
static void EnsureDependenciesExistOnAllNodes(const ObjectAddress *target);
|
||||||
static List * GetDependencyCreateDDLCommands(const ObjectAddress *dependency);
|
static List * GetDependencyCreateDDLCommands(const ObjectAddress *dependency);
|
||||||
static bool ShouldPropagateObject(const ObjectAddress *address);
|
static bool ShouldPropagateObject(const ObjectAddress *address);
|
||||||
|
static char * DropTableIfExistsCommand(Oid relationId);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* EnsureDependenciesExistOnAllNodes finds all the dependencies that we support and makes
|
* EnsureDependenciesExistOnAllNodes finds all the dependencies that we support and makes
|
||||||
|
@ -325,6 +323,21 @@ GetDistributableDependenciesForObject(const ObjectAddress *target)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DropTableIfExistsCommand returns command to drop given table if exists.
|
||||||
|
*/
|
||||||
|
static char *
|
||||||
|
DropTableIfExistsCommand(Oid relationId)
|
||||||
|
{
|
||||||
|
char *qualifiedRelationName = generate_qualified_relation_name(relationId);
|
||||||
|
StringInfo dropTableCommand = makeStringInfo();
|
||||||
|
appendStringInfo(dropTableCommand, "DROP TABLE IF EXISTS %s CASCADE",
|
||||||
|
qualifiedRelationName);
|
||||||
|
|
||||||
|
return dropTableCommand->data;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GetDependencyCreateDDLCommands returns a list (potentially empty or NIL) of ddl
|
* GetDependencyCreateDDLCommands returns a list (potentially empty or NIL) of ddl
|
||||||
* commands to execute on a worker to create the object.
|
* commands to execute on a worker to create the object.
|
||||||
|
@ -370,7 +383,7 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency)
|
||||||
bool creatingShellTableOnRemoteNode = true;
|
bool creatingShellTableOnRemoteNode = true;
|
||||||
List *tableDDLCommands = GetFullTableCreationCommands(relationId,
|
List *tableDDLCommands = GetFullTableCreationCommands(relationId,
|
||||||
WORKER_NEXTVAL_SEQUENCE_DEFAULTS,
|
WORKER_NEXTVAL_SEQUENCE_DEFAULTS,
|
||||||
INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS,
|
INCLUDE_IDENTITY,
|
||||||
creatingShellTableOnRemoteNode);
|
creatingShellTableOnRemoteNode);
|
||||||
TableDDLCommand *tableDDLCommand = NULL;
|
TableDDLCommand *tableDDLCommand = NULL;
|
||||||
foreach_ptr(tableDDLCommand, tableDDLCommands)
|
foreach_ptr(tableDDLCommand, tableDDLCommands)
|
||||||
|
@ -379,6 +392,10 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency)
|
||||||
commandList = lappend(commandList, GetTableDDLCommand(
|
commandList = lappend(commandList, GetTableDDLCommand(
|
||||||
tableDDLCommand));
|
tableDDLCommand));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* we need to drop table, if exists, first to make table creation idempotent */
|
||||||
|
commandList = lcons(DropTableIfExistsCommand(relationId),
|
||||||
|
commandList);
|
||||||
}
|
}
|
||||||
|
|
||||||
return commandList;
|
return commandList;
|
||||||
|
@ -438,6 +455,11 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency)
|
||||||
return DDLCommands;
|
return DDLCommands;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case OCLASS_PUBLICATION:
|
||||||
|
{
|
||||||
|
return CreatePublicationDDLCommandsIdempotent(dependency);
|
||||||
|
}
|
||||||
|
|
||||||
case OCLASS_ROLE:
|
case OCLASS_ROLE:
|
||||||
{
|
{
|
||||||
return GenerateCreateOrAlterRoleCommand(dependency->objectId);
|
return GenerateCreateOrAlterRoleCommand(dependency->objectId);
|
||||||
|
@ -527,68 +549,6 @@ GetAllDependencyCreateDDLCommands(const List *dependencies)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* ReplicateAllObjectsToNodeCommandList returns commands to replicate all
|
|
||||||
* previously marked objects to a worker node. The function also sets
|
|
||||||
* clusterHasDistributedFunction if there are any distributed functions.
|
|
||||||
*/
|
|
||||||
List *
|
|
||||||
ReplicateAllObjectsToNodeCommandList(const char *nodeName, int nodePort)
|
|
||||||
{
|
|
||||||
/* since we are executing ddl commands disable propagation first, primarily for mx */
|
|
||||||
List *ddlCommands = list_make1(DISABLE_DDL_PROPAGATION);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* collect all dependencies in creation order and get their ddl commands
|
|
||||||
*/
|
|
||||||
List *dependencies = GetDistributedObjectAddressList();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Depending on changes in the environment, such as the enable_metadata_sync guc
|
|
||||||
* there might be objects in the distributed object address list that should currently
|
|
||||||
* not be propagated by citus as they are 'not supported'.
|
|
||||||
*/
|
|
||||||
dependencies = FilterObjectAddressListByPredicate(dependencies,
|
|
||||||
&SupportedDependencyByCitus);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* When dependency lists are getting longer we see a delay in the creation time on the
|
|
||||||
* workers. We would like to inform the user. Currently we warn for lists greater than
|
|
||||||
* 100 items, where 100 is an arbitrarily chosen number. If we find it too high or too
|
|
||||||
* low we can adjust this based on experience.
|
|
||||||
*/
|
|
||||||
if (list_length(dependencies) > 100)
|
|
||||||
{
|
|
||||||
ereport(NOTICE, (errmsg("Replicating postgres objects to node %s:%d", nodeName,
|
|
||||||
nodePort),
|
|
||||||
errdetail("There are %d objects to replicate, depending on your "
|
|
||||||
"environment this might take a while",
|
|
||||||
list_length(dependencies))));
|
|
||||||
}
|
|
||||||
|
|
||||||
dependencies = OrderObjectAddressListInDependencyOrder(dependencies);
|
|
||||||
ObjectAddress *dependency = NULL;
|
|
||||||
foreach_ptr(dependency, dependencies)
|
|
||||||
{
|
|
||||||
if (IsAnyObjectAddressOwnedByExtension(list_make1(dependency), NULL))
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* we expect extension-owned objects to be created as a result
|
|
||||||
* of the extension being created.
|
|
||||||
*/
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
ddlCommands = list_concat(ddlCommands,
|
|
||||||
GetDependencyCreateDDLCommands(dependency));
|
|
||||||
}
|
|
||||||
|
|
||||||
ddlCommands = lappend(ddlCommands, ENABLE_DDL_PROPAGATION);
|
|
||||||
|
|
||||||
return ddlCommands;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ShouldPropagate determines if we should be propagating anything
|
* ShouldPropagate determines if we should be propagating anything
|
||||||
*/
|
*/
|
||||||
|
@ -744,7 +704,7 @@ ShouldPropagateAnyObject(List *addresses)
|
||||||
* FilterObjectAddressListByPredicate takes a list of ObjectAddress *'s and returns a list
|
* FilterObjectAddressListByPredicate takes a list of ObjectAddress *'s and returns a list
|
||||||
* only containing the ObjectAddress *'s for which the predicate returned true.
|
* only containing the ObjectAddress *'s for which the predicate returned true.
|
||||||
*/
|
*/
|
||||||
static List *
|
List *
|
||||||
FilterObjectAddressListByPredicate(List *objectAddressList, AddressPredicate predicate)
|
FilterObjectAddressListByPredicate(List *objectAddressList, AddressPredicate predicate)
|
||||||
{
|
{
|
||||||
List *result = NIL;
|
List *result = NIL;
|
||||||
|
|
|
@ -245,6 +245,15 @@ static DistributeObjectOps Any_CreatePolicy = {
|
||||||
.address = NULL,
|
.address = NULL,
|
||||||
.markDistributed = false,
|
.markDistributed = false,
|
||||||
};
|
};
|
||||||
|
static DistributeObjectOps Any_CreatePublication = {
|
||||||
|
.deparse = DeparseCreatePublicationStmt,
|
||||||
|
.qualify = QualifyCreatePublicationStmt,
|
||||||
|
.preprocess = NULL,
|
||||||
|
.postprocess = PostProcessCreatePublicationStmt,
|
||||||
|
.operationType = DIST_OPS_CREATE,
|
||||||
|
.address = CreatePublicationStmtObjectAddress,
|
||||||
|
.markDistributed = true,
|
||||||
|
};
|
||||||
static DistributeObjectOps Any_CreateRole = {
|
static DistributeObjectOps Any_CreateRole = {
|
||||||
.deparse = DeparseCreateRoleStmt,
|
.deparse = DeparseCreateRoleStmt,
|
||||||
.qualify = NULL,
|
.qualify = NULL,
|
||||||
|
@ -707,6 +716,45 @@ static DistributeObjectOps Procedure_Rename = {
|
||||||
.address = RenameFunctionStmtObjectAddress,
|
.address = RenameFunctionStmtObjectAddress,
|
||||||
.markDistributed = false,
|
.markDistributed = false,
|
||||||
};
|
};
|
||||||
|
static DistributeObjectOps Publication_Alter = {
|
||||||
|
.deparse = DeparseAlterPublicationStmt,
|
||||||
|
.qualify = QualifyAlterPublicationStmt,
|
||||||
|
.preprocess = PreprocessAlterPublicationStmt,
|
||||||
|
.postprocess = PostprocessAlterDistributedObjectStmt,
|
||||||
|
.objectType = OBJECT_PUBLICATION,
|
||||||
|
.operationType = DIST_OPS_ALTER,
|
||||||
|
.address = AlterPublicationStmtObjectAddress,
|
||||||
|
.markDistributed = false,
|
||||||
|
};
|
||||||
|
static DistributeObjectOps Publication_AlterOwner = {
|
||||||
|
.deparse = DeparseAlterPublicationOwnerStmt,
|
||||||
|
.qualify = NULL,
|
||||||
|
.preprocess = PreprocessAlterDistributedObjectStmt,
|
||||||
|
.postprocess = PostprocessAlterDistributedObjectStmt,
|
||||||
|
.objectType = OBJECT_PUBLICATION,
|
||||||
|
.operationType = DIST_OPS_ALTER,
|
||||||
|
.address = AlterPublicationOwnerStmtObjectAddress,
|
||||||
|
.markDistributed = false,
|
||||||
|
};
|
||||||
|
static DistributeObjectOps Publication_Drop = {
|
||||||
|
.deparse = DeparseDropPublicationStmt,
|
||||||
|
.qualify = NULL,
|
||||||
|
.preprocess = PreprocessDropDistributedObjectStmt,
|
||||||
|
.postprocess = NULL,
|
||||||
|
.operationType = DIST_OPS_DROP,
|
||||||
|
.address = NULL,
|
||||||
|
.markDistributed = false,
|
||||||
|
};
|
||||||
|
static DistributeObjectOps Publication_Rename = {
|
||||||
|
.deparse = DeparseRenamePublicationStmt,
|
||||||
|
.qualify = NULL,
|
||||||
|
.preprocess = PreprocessAlterDistributedObjectStmt,
|
||||||
|
.postprocess = NULL,
|
||||||
|
.objectType = OBJECT_PUBLICATION,
|
||||||
|
.operationType = DIST_OPS_ALTER,
|
||||||
|
.address = RenamePublicationStmtObjectAddress,
|
||||||
|
.markDistributed = false,
|
||||||
|
};
|
||||||
static DistributeObjectOps Routine_AlterObjectDepends = {
|
static DistributeObjectOps Routine_AlterObjectDepends = {
|
||||||
.deparse = DeparseAlterFunctionDependsStmt,
|
.deparse = DeparseAlterFunctionDependsStmt,
|
||||||
.qualify = QualifyAlterFunctionDependsStmt,
|
.qualify = QualifyAlterFunctionDependsStmt,
|
||||||
|
@ -1399,6 +1447,11 @@ GetDistributeObjectOps(Node *node)
|
||||||
return &Procedure_AlterOwner;
|
return &Procedure_AlterOwner;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case OBJECT_PUBLICATION:
|
||||||
|
{
|
||||||
|
return &Publication_AlterOwner;
|
||||||
|
}
|
||||||
|
|
||||||
case OBJECT_ROUTINE:
|
case OBJECT_ROUTINE:
|
||||||
{
|
{
|
||||||
return &Routine_AlterOwner;
|
return &Routine_AlterOwner;
|
||||||
|
@ -1436,6 +1489,11 @@ GetDistributeObjectOps(Node *node)
|
||||||
return &Any_AlterPolicy;
|
return &Any_AlterPolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case T_AlterPublicationStmt:
|
||||||
|
{
|
||||||
|
return &Publication_Alter;
|
||||||
|
}
|
||||||
|
|
||||||
case T_AlterRoleStmt:
|
case T_AlterRoleStmt:
|
||||||
{
|
{
|
||||||
return &Any_AlterRole;
|
return &Any_AlterRole;
|
||||||
|
@ -1610,6 +1668,11 @@ GetDistributeObjectOps(Node *node)
|
||||||
return &Any_CreatePolicy;
|
return &Any_CreatePolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case T_CreatePublicationStmt:
|
||||||
|
{
|
||||||
|
return &Any_CreatePublication;
|
||||||
|
}
|
||||||
|
|
||||||
case T_CreateRoleStmt:
|
case T_CreateRoleStmt:
|
||||||
{
|
{
|
||||||
return &Any_CreateRole;
|
return &Any_CreateRole;
|
||||||
|
@ -1722,6 +1785,11 @@ GetDistributeObjectOps(Node *node)
|
||||||
return &Procedure_Drop;
|
return &Procedure_Drop;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case OBJECT_PUBLICATION:
|
||||||
|
{
|
||||||
|
return &Publication_Drop;
|
||||||
|
}
|
||||||
|
|
||||||
case OBJECT_ROUTINE:
|
case OBJECT_ROUTINE:
|
||||||
{
|
{
|
||||||
return &Routine_Drop;
|
return &Routine_Drop;
|
||||||
|
@ -1901,6 +1969,11 @@ GetDistributeObjectOps(Node *node)
|
||||||
return &Procedure_Rename;
|
return &Procedure_Rename;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case OBJECT_PUBLICATION:
|
||||||
|
{
|
||||||
|
return &Publication_Rename;
|
||||||
|
}
|
||||||
|
|
||||||
case OBJECT_ROUTINE:
|
case OBJECT_ROUTINE:
|
||||||
{
|
{
|
||||||
return &Routine_Rename;
|
return &Routine_Rename;
|
||||||
|
|
|
@ -221,7 +221,8 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
|
||||||
if (!referencedIsCitus && !selfReferencingTable)
|
if (!referencedIsCitus && !selfReferencingTable)
|
||||||
{
|
{
|
||||||
if (IsCitusLocalTableByDistParams(referencingDistMethod,
|
if (IsCitusLocalTableByDistParams(referencingDistMethod,
|
||||||
referencingReplicationModel))
|
referencingReplicationModel,
|
||||||
|
referencingColocationId))
|
||||||
{
|
{
|
||||||
ErrorOutForFKeyBetweenPostgresAndCitusLocalTable(referencedTableId);
|
ErrorOutForFKeyBetweenPostgresAndCitusLocalTable(referencedTableId);
|
||||||
}
|
}
|
||||||
|
@ -245,8 +246,7 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
|
||||||
if (!selfReferencingTable)
|
if (!selfReferencingTable)
|
||||||
{
|
{
|
||||||
referencedDistMethod = PartitionMethod(referencedTableId);
|
referencedDistMethod = PartitionMethod(referencedTableId);
|
||||||
referencedDistKey = IsCitusTableType(referencedTableId,
|
referencedDistKey = !HasDistributionKey(referencedTableId) ?
|
||||||
CITUS_TABLE_WITH_NO_DIST_KEY) ?
|
|
||||||
NULL :
|
NULL :
|
||||||
DistPartitionKey(referencedTableId);
|
DistPartitionKey(referencedTableId);
|
||||||
referencedColocationId = TableColocationId(referencedTableId);
|
referencedColocationId = TableColocationId(referencedTableId);
|
||||||
|
@ -278,9 +278,17 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
|
||||||
}
|
}
|
||||||
|
|
||||||
bool referencingIsCitusLocalOrRefTable =
|
bool referencingIsCitusLocalOrRefTable =
|
||||||
(referencingDistMethod == DISTRIBUTE_BY_NONE);
|
IsCitusLocalTableByDistParams(referencingDistMethod,
|
||||||
|
referencingReplicationModel,
|
||||||
|
referencingColocationId) ||
|
||||||
|
IsReferenceTableByDistParams(referencingDistMethod,
|
||||||
|
referencingReplicationModel);
|
||||||
bool referencedIsCitusLocalOrRefTable =
|
bool referencedIsCitusLocalOrRefTable =
|
||||||
(referencedDistMethod == DISTRIBUTE_BY_NONE);
|
IsCitusLocalTableByDistParams(referencedDistMethod,
|
||||||
|
referencedReplicationModel,
|
||||||
|
referencedColocationId) ||
|
||||||
|
IsReferenceTableByDistParams(referencedDistMethod,
|
||||||
|
referencedReplicationModel);
|
||||||
if (referencingIsCitusLocalOrRefTable && referencedIsCitusLocalOrRefTable)
|
if (referencingIsCitusLocalOrRefTable && referencedIsCitusLocalOrRefTable)
|
||||||
{
|
{
|
||||||
EnsureSupportedFKeyBetweenCitusLocalAndRefTable(constraintForm,
|
EnsureSupportedFKeyBetweenCitusLocalAndRefTable(constraintForm,
|
||||||
|
@ -313,7 +321,8 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
|
||||||
* reference table is referenced.
|
* reference table is referenced.
|
||||||
*/
|
*/
|
||||||
bool referencedIsReferenceTable =
|
bool referencedIsReferenceTable =
|
||||||
(referencedReplicationModel == REPLICATION_MODEL_2PC);
|
IsReferenceTableByDistParams(referencedDistMethod,
|
||||||
|
referencedReplicationModel);
|
||||||
if (!referencedIsReferenceTable && (
|
if (!referencedIsReferenceTable && (
|
||||||
referencingColocationId == INVALID_COLOCATION_ID ||
|
referencingColocationId == INVALID_COLOCATION_ID ||
|
||||||
referencingColocationId != referencedColocationId))
|
referencingColocationId != referencedColocationId))
|
||||||
|
|
|
@ -1190,7 +1190,7 @@ ErrorIfUnsupportedIndexStmt(IndexStmt *createIndexStatement)
|
||||||
* Non-distributed tables do not have partition key, and unique constraints
|
* Non-distributed tables do not have partition key, and unique constraints
|
||||||
* are allowed for them. Thus, we added a short-circuit for non-distributed tables.
|
* are allowed for them. Thus, we added a short-circuit for non-distributed tables.
|
||||||
*/
|
*/
|
||||||
if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
if (!HasDistributionKey(relationId))
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,6 +36,7 @@
|
||||||
#include "distributed/local_multi_copy.h"
|
#include "distributed/local_multi_copy.h"
|
||||||
#include "distributed/shard_utils.h"
|
#include "distributed/shard_utils.h"
|
||||||
#include "distributed/version_compat.h"
|
#include "distributed/version_compat.h"
|
||||||
|
#include "distributed/replication_origin_session_utils.h"
|
||||||
|
|
||||||
/* managed via GUC, default is 512 kB */
|
/* managed via GUC, default is 512 kB */
|
||||||
int LocalCopyFlushThresholdByte = 512 * 1024;
|
int LocalCopyFlushThresholdByte = 512 * 1024;
|
||||||
|
@ -46,7 +47,7 @@ static void AddSlotToBuffer(TupleTableSlot *slot, CitusCopyDestReceiver *copyDes
|
||||||
static bool ShouldAddBinaryHeaders(StringInfo buffer, bool isBinary);
|
static bool ShouldAddBinaryHeaders(StringInfo buffer, bool isBinary);
|
||||||
static bool ShouldSendCopyNow(StringInfo buffer);
|
static bool ShouldSendCopyNow(StringInfo buffer);
|
||||||
static void DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId,
|
static void DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId,
|
||||||
CopyStmt *copyStatement, bool isEndOfCopy);
|
CopyStmt *copyStatement, bool isEndOfCopy, bool isPublishable);
|
||||||
static int ReadFromLocalBufferCallback(void *outBuf, int minRead, int maxRead);
|
static int ReadFromLocalBufferCallback(void *outBuf, int minRead, int maxRead);
|
||||||
|
|
||||||
|
|
||||||
|
@ -94,7 +95,7 @@ WriteTupleToLocalShard(TupleTableSlot *slot, CitusCopyDestReceiver *copyDest, in
|
||||||
bool isEndOfCopy = false;
|
bool isEndOfCopy = false;
|
||||||
DoLocalCopy(localCopyOutState->fe_msgbuf, copyDest->distributedRelationId,
|
DoLocalCopy(localCopyOutState->fe_msgbuf, copyDest->distributedRelationId,
|
||||||
shardId,
|
shardId,
|
||||||
copyDest->copyStatement, isEndOfCopy);
|
copyDest->copyStatement, isEndOfCopy, copyDest->isPublishable);
|
||||||
resetStringInfo(localCopyOutState->fe_msgbuf);
|
resetStringInfo(localCopyOutState->fe_msgbuf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -133,7 +134,7 @@ FinishLocalCopyToShard(CitusCopyDestReceiver *copyDest, int64 shardId,
|
||||||
}
|
}
|
||||||
bool isEndOfCopy = true;
|
bool isEndOfCopy = true;
|
||||||
DoLocalCopy(localCopyOutState->fe_msgbuf, copyDest->distributedRelationId, shardId,
|
DoLocalCopy(localCopyOutState->fe_msgbuf, copyDest->distributedRelationId, shardId,
|
||||||
copyDest->copyStatement, isEndOfCopy);
|
copyDest->copyStatement, isEndOfCopy, copyDest->isPublishable);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -197,7 +198,7 @@ ShouldSendCopyNow(StringInfo buffer)
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId, CopyStmt *copyStatement,
|
DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId, CopyStmt *copyStatement,
|
||||||
bool isEndOfCopy)
|
bool isEndOfCopy, bool isPublishable)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Set the buffer as a global variable to allow ReadFromLocalBufferCallback
|
* Set the buffer as a global variable to allow ReadFromLocalBufferCallback
|
||||||
|
@ -205,6 +206,10 @@ DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId, CopyStmt *copyStat
|
||||||
* ReadFromLocalBufferCallback.
|
* ReadFromLocalBufferCallback.
|
||||||
*/
|
*/
|
||||||
LocalCopyBuffer = buffer;
|
LocalCopyBuffer = buffer;
|
||||||
|
if (!isPublishable)
|
||||||
|
{
|
||||||
|
SetupReplicationOriginLocalSession();
|
||||||
|
}
|
||||||
|
|
||||||
Oid shardOid = GetTableLocalShardOid(relationId, shardId);
|
Oid shardOid = GetTableLocalShardOid(relationId, shardId);
|
||||||
Relation shard = table_open(shardOid, RowExclusiveLock);
|
Relation shard = table_open(shardOid, RowExclusiveLock);
|
||||||
|
@ -219,6 +224,10 @@ DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId, CopyStmt *copyStat
|
||||||
EndCopyFrom(cstate);
|
EndCopyFrom(cstate);
|
||||||
|
|
||||||
table_close(shard, NoLock);
|
table_close(shard, NoLock);
|
||||||
|
if (!isPublishable)
|
||||||
|
{
|
||||||
|
ResetReplicationOriginLocalSession();
|
||||||
|
}
|
||||||
free_parsestate(pState);
|
free_parsestate(pState);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -85,6 +85,7 @@
|
||||||
#include "distributed/relation_access_tracking.h"
|
#include "distributed/relation_access_tracking.h"
|
||||||
#include "distributed/remote_commands.h"
|
#include "distributed/remote_commands.h"
|
||||||
#include "distributed/remote_transaction.h"
|
#include "distributed/remote_transaction.h"
|
||||||
|
#include "distributed/replication_origin_session_utils.h"
|
||||||
#include "distributed/resource_lock.h"
|
#include "distributed/resource_lock.h"
|
||||||
#include "distributed/shard_pruning.h"
|
#include "distributed/shard_pruning.h"
|
||||||
#include "distributed/shared_connection_stats.h"
|
#include "distributed/shared_connection_stats.h"
|
||||||
|
@ -270,7 +271,8 @@ static CopyConnectionState * GetConnectionState(HTAB *connectionStateHash,
|
||||||
static CopyShardState * GetShardState(uint64 shardId, HTAB *shardStateHash,
|
static CopyShardState * GetShardState(uint64 shardId, HTAB *shardStateHash,
|
||||||
HTAB *connectionStateHash,
|
HTAB *connectionStateHash,
|
||||||
bool *found, bool shouldUseLocalCopy, CopyOutState
|
bool *found, bool shouldUseLocalCopy, CopyOutState
|
||||||
copyOutState, bool isColocatedIntermediateResult);
|
copyOutState, bool isColocatedIntermediateResult,
|
||||||
|
bool isPublishable);
|
||||||
static MultiConnection * CopyGetPlacementConnection(HTAB *connectionStateHash,
|
static MultiConnection * CopyGetPlacementConnection(HTAB *connectionStateHash,
|
||||||
ShardPlacement *placement,
|
ShardPlacement *placement,
|
||||||
bool colocatedIntermediateResult);
|
bool colocatedIntermediateResult);
|
||||||
|
@ -285,7 +287,8 @@ static void InitializeCopyShardState(CopyShardState *shardState,
|
||||||
uint64 shardId,
|
uint64 shardId,
|
||||||
bool canUseLocalCopy,
|
bool canUseLocalCopy,
|
||||||
CopyOutState copyOutState,
|
CopyOutState copyOutState,
|
||||||
bool colocatedIntermediateResult);
|
bool colocatedIntermediateResult, bool
|
||||||
|
isPublishable);
|
||||||
static void StartPlacementStateCopyCommand(CopyPlacementState *placementState,
|
static void StartPlacementStateCopyCommand(CopyPlacementState *placementState,
|
||||||
CopyStmt *copyStatement,
|
CopyStmt *copyStatement,
|
||||||
CopyOutState copyOutState);
|
CopyOutState copyOutState);
|
||||||
|
@ -393,7 +396,7 @@ CitusCopyFrom(CopyStmt *copyStatement, QueryCompletion *completionTag)
|
||||||
if (IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) ||
|
if (IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) ||
|
||||||
IsCitusTableTypeCacheEntry(cacheEntry, RANGE_DISTRIBUTED) ||
|
IsCitusTableTypeCacheEntry(cacheEntry, RANGE_DISTRIBUTED) ||
|
||||||
IsCitusTableTypeCacheEntry(cacheEntry, APPEND_DISTRIBUTED) ||
|
IsCitusTableTypeCacheEntry(cacheEntry, APPEND_DISTRIBUTED) ||
|
||||||
IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
!HasDistributionKeyCacheEntry(cacheEntry))
|
||||||
{
|
{
|
||||||
CopyToExistingShards(copyStatement, completionTag);
|
CopyToExistingShards(copyStatement, completionTag);
|
||||||
}
|
}
|
||||||
|
@ -492,9 +495,11 @@ CopyToExistingShards(CopyStmt *copyStatement, QueryCompletion *completionTag)
|
||||||
ExprContext *executorExpressionContext = GetPerTupleExprContext(executorState);
|
ExprContext *executorExpressionContext = GetPerTupleExprContext(executorState);
|
||||||
|
|
||||||
/* set up the destination for the COPY */
|
/* set up the destination for the COPY */
|
||||||
|
const bool publishableData = true;
|
||||||
CitusCopyDestReceiver *copyDest = CreateCitusCopyDestReceiver(tableId, columnNameList,
|
CitusCopyDestReceiver *copyDest = CreateCitusCopyDestReceiver(tableId, columnNameList,
|
||||||
partitionColumnIndex,
|
partitionColumnIndex,
|
||||||
executorState, NULL);
|
executorState, NULL,
|
||||||
|
publishableData);
|
||||||
|
|
||||||
/* if the user specified an explicit append-to_shard option, write to it */
|
/* if the user specified an explicit append-to_shard option, write to it */
|
||||||
uint64 appendShardId = ProcessAppendToShardOption(tableId, copyStatement);
|
uint64 appendShardId = ProcessAppendToShardOption(tableId, copyStatement);
|
||||||
|
@ -1934,7 +1939,7 @@ CopyFlushOutput(CopyOutState cstate, char *start, char *pointer)
|
||||||
CitusCopyDestReceiver *
|
CitusCopyDestReceiver *
|
||||||
CreateCitusCopyDestReceiver(Oid tableId, List *columnNameList, int partitionColumnIndex,
|
CreateCitusCopyDestReceiver(Oid tableId, List *columnNameList, int partitionColumnIndex,
|
||||||
EState *executorState,
|
EState *executorState,
|
||||||
char *intermediateResultIdPrefix)
|
char *intermediateResultIdPrefix, bool isPublishable)
|
||||||
{
|
{
|
||||||
CitusCopyDestReceiver *copyDest = (CitusCopyDestReceiver *) palloc0(
|
CitusCopyDestReceiver *copyDest = (CitusCopyDestReceiver *) palloc0(
|
||||||
sizeof(CitusCopyDestReceiver));
|
sizeof(CitusCopyDestReceiver));
|
||||||
|
@ -1953,6 +1958,7 @@ CreateCitusCopyDestReceiver(Oid tableId, List *columnNameList, int partitionColu
|
||||||
copyDest->executorState = executorState;
|
copyDest->executorState = executorState;
|
||||||
copyDest->colocatedIntermediateResultIdPrefix = intermediateResultIdPrefix;
|
copyDest->colocatedIntermediateResultIdPrefix = intermediateResultIdPrefix;
|
||||||
copyDest->memoryContext = CurrentMemoryContext;
|
copyDest->memoryContext = CurrentMemoryContext;
|
||||||
|
copyDest->isPublishable = isPublishable;
|
||||||
|
|
||||||
return copyDest;
|
return copyDest;
|
||||||
}
|
}
|
||||||
|
@ -2318,7 +2324,9 @@ CitusSendTupleToPlacements(TupleTableSlot *slot, CitusCopyDestReceiver *copyDest
|
||||||
&cachedShardStateFound,
|
&cachedShardStateFound,
|
||||||
copyDest->shouldUseLocalCopy,
|
copyDest->shouldUseLocalCopy,
|
||||||
copyDest->copyOutState,
|
copyDest->copyOutState,
|
||||||
isColocatedIntermediateResult);
|
isColocatedIntermediateResult,
|
||||||
|
copyDest->isPublishable);
|
||||||
|
|
||||||
if (!cachedShardStateFound)
|
if (!cachedShardStateFound)
|
||||||
{
|
{
|
||||||
firstTupleInShard = true;
|
firstTupleInShard = true;
|
||||||
|
@ -2751,6 +2759,11 @@ ShutdownCopyConnectionState(CopyConnectionState *connectionState,
|
||||||
if (activePlacementState != NULL)
|
if (activePlacementState != NULL)
|
||||||
{
|
{
|
||||||
EndPlacementStateCopyCommand(activePlacementState, copyOutState);
|
EndPlacementStateCopyCommand(activePlacementState, copyOutState);
|
||||||
|
if (!copyDest->isPublishable)
|
||||||
|
{
|
||||||
|
ResetReplicationOriginRemoteSession(
|
||||||
|
activePlacementState->connectionState->connection);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dlist_foreach(iter, &connectionState->bufferedPlacementList)
|
dlist_foreach(iter, &connectionState->bufferedPlacementList)
|
||||||
|
@ -2764,6 +2777,10 @@ ShutdownCopyConnectionState(CopyConnectionState *connectionState,
|
||||||
SendCopyDataToPlacement(placementState->data, shardId,
|
SendCopyDataToPlacement(placementState->data, shardId,
|
||||||
connectionState->connection);
|
connectionState->connection);
|
||||||
EndPlacementStateCopyCommand(placementState, copyOutState);
|
EndPlacementStateCopyCommand(placementState, copyOutState);
|
||||||
|
if (!copyDest->isPublishable)
|
||||||
|
{
|
||||||
|
ResetReplicationOriginRemoteSession(connectionState->connection);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3436,7 +3453,7 @@ static CopyShardState *
|
||||||
GetShardState(uint64 shardId, HTAB *shardStateHash,
|
GetShardState(uint64 shardId, HTAB *shardStateHash,
|
||||||
HTAB *connectionStateHash, bool *found, bool
|
HTAB *connectionStateHash, bool *found, bool
|
||||||
shouldUseLocalCopy, CopyOutState copyOutState,
|
shouldUseLocalCopy, CopyOutState copyOutState,
|
||||||
bool isColocatedIntermediateResult)
|
bool isColocatedIntermediateResult, bool isPublishable)
|
||||||
{
|
{
|
||||||
CopyShardState *shardState = (CopyShardState *) hash_search(shardStateHash, &shardId,
|
CopyShardState *shardState = (CopyShardState *) hash_search(shardStateHash, &shardId,
|
||||||
HASH_ENTER, found);
|
HASH_ENTER, found);
|
||||||
|
@ -3444,7 +3461,8 @@ GetShardState(uint64 shardId, HTAB *shardStateHash,
|
||||||
{
|
{
|
||||||
InitializeCopyShardState(shardState, connectionStateHash,
|
InitializeCopyShardState(shardState, connectionStateHash,
|
||||||
shardId, shouldUseLocalCopy,
|
shardId, shouldUseLocalCopy,
|
||||||
copyOutState, isColocatedIntermediateResult);
|
copyOutState, isColocatedIntermediateResult,
|
||||||
|
isPublishable);
|
||||||
}
|
}
|
||||||
|
|
||||||
return shardState;
|
return shardState;
|
||||||
|
@ -3461,7 +3479,8 @@ InitializeCopyShardState(CopyShardState *shardState,
|
||||||
HTAB *connectionStateHash, uint64 shardId,
|
HTAB *connectionStateHash, uint64 shardId,
|
||||||
bool shouldUseLocalCopy,
|
bool shouldUseLocalCopy,
|
||||||
CopyOutState copyOutState,
|
CopyOutState copyOutState,
|
||||||
bool colocatedIntermediateResult)
|
bool colocatedIntermediateResult,
|
||||||
|
bool isPublishable)
|
||||||
{
|
{
|
||||||
ListCell *placementCell = NULL;
|
ListCell *placementCell = NULL;
|
||||||
int failedPlacementCount = 0;
|
int failedPlacementCount = 0;
|
||||||
|
@ -3532,6 +3551,11 @@ InitializeCopyShardState(CopyShardState *shardState,
|
||||||
RemoteTransactionBeginIfNecessary(connection);
|
RemoteTransactionBeginIfNecessary(connection);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!isPublishable)
|
||||||
|
{
|
||||||
|
SetupReplicationOriginRemoteSession(connection);
|
||||||
|
}
|
||||||
|
|
||||||
CopyPlacementState *placementState = palloc0(sizeof(CopyPlacementState));
|
CopyPlacementState *placementState = palloc0(sizeof(CopyPlacementState));
|
||||||
placementState->shardState = shardState;
|
placementState->shardState = shardState;
|
||||||
placementState->data = makeStringInfo();
|
placementState->data = makeStringInfo();
|
||||||
|
|
|
@ -0,0 +1,634 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* publication.c
|
||||||
|
* Commands for creating publications
|
||||||
|
*
|
||||||
|
* Copyright (c) Citus Data, Inc.
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "postgres.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
|
||||||
|
#include "catalog/pg_publication.h"
|
||||||
|
#include "catalog/pg_publication_rel.h"
|
||||||
|
#include "distributed/commands.h"
|
||||||
|
#include "distributed/deparser.h"
|
||||||
|
#include "distributed/listutils.h"
|
||||||
|
#include "distributed/metadata_utility.h"
|
||||||
|
#include "distributed/metadata_sync.h"
|
||||||
|
#include "distributed/metadata/distobject.h"
|
||||||
|
#include "distributed/reference_table_utils.h"
|
||||||
|
#include "distributed/worker_create_or_replace.h"
|
||||||
|
#include "nodes/makefuncs.h"
|
||||||
|
#include "nodes/parsenodes.h"
|
||||||
|
#include "utils/builtins.h"
|
||||||
|
#include "utils/lsyscache.h"
|
||||||
|
#include "utils/syscache.h"
|
||||||
|
|
||||||
|
#include "pg_version_compat.h"
|
||||||
|
|
||||||
|
|
||||||
|
static CreatePublicationStmt * BuildCreatePublicationStmt(Oid publicationId);
|
||||||
|
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||||
|
static PublicationObjSpec * BuildPublicationRelationObjSpec(Oid relationId,
|
||||||
|
Oid publicationId,
|
||||||
|
bool tableOnly);
|
||||||
|
#endif
|
||||||
|
static void AppendPublishOptionList(StringInfo str, List *strings);
|
||||||
|
static char * AlterPublicationOwnerCommand(Oid publicationId);
|
||||||
|
static bool ShouldPropagateCreatePublication(CreatePublicationStmt *stmt);
|
||||||
|
static List * ObjectAddressForPublicationName(char *publicationName, bool missingOk);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PostProcessCreatePublicationStmt handles CREATE PUBLICATION statements
|
||||||
|
* that contain distributed tables.
|
||||||
|
*/
|
||||||
|
List *
|
||||||
|
PostProcessCreatePublicationStmt(Node *node, const char *queryString)
|
||||||
|
{
|
||||||
|
CreatePublicationStmt *stmt = castNode(CreatePublicationStmt, node);
|
||||||
|
|
||||||
|
if (!ShouldPropagateCreatePublication(stmt))
|
||||||
|
{
|
||||||
|
/* should not propagate right now */
|
||||||
|
return NIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* call into CreatePublicationStmtObjectAddress */
|
||||||
|
List *publicationAddresses = GetObjectAddressListFromParseTree(node, false, true);
|
||||||
|
|
||||||
|
/* the code-path only supports a single object */
|
||||||
|
Assert(list_length(publicationAddresses) == 1);
|
||||||
|
|
||||||
|
if (IsAnyObjectAddressOwnedByExtension(publicationAddresses, NULL))
|
||||||
|
{
|
||||||
|
/* should not propagate publications owned by extensions */
|
||||||
|
return NIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
EnsureAllObjectDependenciesExistOnAllNodes(publicationAddresses);
|
||||||
|
|
||||||
|
const ObjectAddress *pubAddress = linitial(publicationAddresses);
|
||||||
|
|
||||||
|
List *commands = NIL;
|
||||||
|
commands = lappend(commands, DISABLE_DDL_PROPAGATION);
|
||||||
|
commands = lappend(commands, CreatePublicationDDLCommand(pubAddress->objectId));
|
||||||
|
commands = lappend(commands, ENABLE_DDL_PROPAGATION);
|
||||||
|
|
||||||
|
return NodeDDLTaskList(NON_COORDINATOR_NODES, commands);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CreatePublicationDDLCommandsIdempotent returns a list of DDL statements to be
|
||||||
|
* executed on a node to recreate the publication addressed by the publicationAddress.
|
||||||
|
*/
|
||||||
|
List *
|
||||||
|
CreatePublicationDDLCommandsIdempotent(const ObjectAddress *publicationAddress)
|
||||||
|
{
|
||||||
|
Assert(publicationAddress->classId == PublicationRelationId);
|
||||||
|
|
||||||
|
char *ddlCommand =
|
||||||
|
CreatePublicationDDLCommand(publicationAddress->objectId);
|
||||||
|
|
||||||
|
char *alterPublicationOwnerSQL =
|
||||||
|
AlterPublicationOwnerCommand(publicationAddress->objectId);
|
||||||
|
|
||||||
|
return list_make2(
|
||||||
|
WrapCreateOrReplace(ddlCommand),
|
||||||
|
alterPublicationOwnerSQL);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CreatePublicationDDLCommand returns the CREATE PUBLICATION string that
|
||||||
|
* can be used to recreate a given publication.
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
CreatePublicationDDLCommand(Oid publicationId)
|
||||||
|
{
|
||||||
|
CreatePublicationStmt *createPubStmt = BuildCreatePublicationStmt(publicationId);
|
||||||
|
|
||||||
|
/* we took the WHERE clause from the catalog where it is already transformed */
|
||||||
|
bool whereClauseRequiresTransform = false;
|
||||||
|
|
||||||
|
/* only propagate Citus tables in publication */
|
||||||
|
bool includeLocalTables = false;
|
||||||
|
|
||||||
|
return DeparseCreatePublicationStmtExtended((Node *) createPubStmt,
|
||||||
|
whereClauseRequiresTransform,
|
||||||
|
includeLocalTables);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* BuildCreatePublicationStmt constructs a CreatePublicationStmt struct for the
|
||||||
|
* given publication.
|
||||||
|
*/
|
||||||
|
static CreatePublicationStmt *
|
||||||
|
BuildCreatePublicationStmt(Oid publicationId)
|
||||||
|
{
|
||||||
|
CreatePublicationStmt *createPubStmt = makeNode(CreatePublicationStmt);
|
||||||
|
|
||||||
|
HeapTuple publicationTuple =
|
||||||
|
SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(publicationId));
|
||||||
|
|
||||||
|
if (!HeapTupleIsValid(publicationTuple))
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("cannot find publication with oid: %d", publicationId)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Form_pg_publication publicationForm =
|
||||||
|
(Form_pg_publication) GETSTRUCT(publicationTuple);
|
||||||
|
|
||||||
|
/* CREATE PUBLICATION <name> */
|
||||||
|
createPubStmt->pubname = pstrdup(NameStr(publicationForm->pubname));
|
||||||
|
|
||||||
|
/* FOR ALL TABLES */
|
||||||
|
createPubStmt->for_all_tables = publicationForm->puballtables;
|
||||||
|
|
||||||
|
ReleaseSysCache(publicationTuple);
|
||||||
|
|
||||||
|
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||||
|
List *schemaIds = GetPublicationSchemas(publicationId);
|
||||||
|
Oid schemaId = InvalidOid;
|
||||||
|
|
||||||
|
foreach_oid(schemaId, schemaIds)
|
||||||
|
{
|
||||||
|
char *schemaName = get_namespace_name(schemaId);
|
||||||
|
|
||||||
|
PublicationObjSpec *publicationObject = makeNode(PublicationObjSpec);
|
||||||
|
publicationObject->pubobjtype = PUBLICATIONOBJ_TABLES_IN_SCHEMA;
|
||||||
|
publicationObject->pubtable = NULL;
|
||||||
|
publicationObject->name = schemaName;
|
||||||
|
publicationObject->location = -1;
|
||||||
|
|
||||||
|
createPubStmt->pubobjects = lappend(createPubStmt->pubobjects, publicationObject);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
List *relationIds = GetPublicationRelations(publicationId,
|
||||||
|
publicationForm->pubviaroot ?
|
||||||
|
PUBLICATION_PART_ROOT :
|
||||||
|
PUBLICATION_PART_LEAF);
|
||||||
|
Oid relationId = InvalidOid;
|
||||||
|
int citusTableCount PG_USED_FOR_ASSERTS_ONLY = 0;
|
||||||
|
|
||||||
|
/* mainly for consistent ordering in test output */
|
||||||
|
relationIds = SortList(relationIds, CompareOids);
|
||||||
|
|
||||||
|
foreach_oid(relationId, relationIds)
|
||||||
|
{
|
||||||
|
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||||
|
bool tableOnly = false;
|
||||||
|
|
||||||
|
/* since postgres 15, tables can have a column list and filter */
|
||||||
|
PublicationObjSpec *publicationObject =
|
||||||
|
BuildPublicationRelationObjSpec(relationId, publicationId, tableOnly);
|
||||||
|
|
||||||
|
createPubStmt->pubobjects = lappend(createPubStmt->pubobjects, publicationObject);
|
||||||
|
#else
|
||||||
|
|
||||||
|
/* before postgres 15, only full tables are supported */
|
||||||
|
char *schemaName = get_namespace_name(get_rel_namespace(relationId));
|
||||||
|
char *tableName = get_rel_name(relationId);
|
||||||
|
RangeVar *rangeVar = makeRangeVar(schemaName, tableName, -1);
|
||||||
|
|
||||||
|
createPubStmt->tables = lappend(createPubStmt->tables, rangeVar);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (IsCitusTable(relationId))
|
||||||
|
{
|
||||||
|
citusTableCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* WITH (publish_via_partition_root = true) option */
|
||||||
|
bool publishViaRoot = publicationForm->pubviaroot;
|
||||||
|
char *publishViaRootString = publishViaRoot ? "true" : "false";
|
||||||
|
DefElem *pubViaRootOption = makeDefElem("publish_via_partition_root",
|
||||||
|
(Node *) makeString(publishViaRootString),
|
||||||
|
-1);
|
||||||
|
createPubStmt->options = lappend(createPubStmt->options, pubViaRootOption);
|
||||||
|
|
||||||
|
/* WITH (publish = 'insert, update, delete, truncate') option */
|
||||||
|
List *publishList = NIL;
|
||||||
|
|
||||||
|
if (publicationForm->pubinsert)
|
||||||
|
{
|
||||||
|
publishList = lappend(publishList, makeString("insert"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (publicationForm->pubupdate)
|
||||||
|
{
|
||||||
|
publishList = lappend(publishList, makeString("update"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (publicationForm->pubdelete)
|
||||||
|
{
|
||||||
|
publishList = lappend(publishList, makeString("delete"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (publicationForm->pubtruncate)
|
||||||
|
{
|
||||||
|
publishList = lappend(publishList, makeString("truncate"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (list_length(publishList) > 0)
|
||||||
|
{
|
||||||
|
StringInfo optionValue = makeStringInfo();
|
||||||
|
AppendPublishOptionList(optionValue, publishList);
|
||||||
|
|
||||||
|
DefElem *publishOption = makeDefElem("publish",
|
||||||
|
(Node *) makeString(optionValue->data), -1);
|
||||||
|
createPubStmt->options = lappend(createPubStmt->options, publishOption);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return createPubStmt;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AppendPublishOptionList appends a list of publication options in
|
||||||
|
* comma-separate form.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AppendPublishOptionList(StringInfo str, List *options)
|
||||||
|
{
|
||||||
|
ListCell *stringCell = NULL;
|
||||||
|
foreach(stringCell, options)
|
||||||
|
{
|
||||||
|
const char *string = strVal(lfirst(stringCell));
|
||||||
|
if (stringCell != list_head(options))
|
||||||
|
{
|
||||||
|
appendStringInfoString(str, ", ");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we cannot escape these strings */
|
||||||
|
appendStringInfoString(str, string);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* BuildPublicationRelationObjSpec returns a PublicationObjSpec that
|
||||||
|
* can be included in a CREATE or ALTER PUBLICATION statement.
|
||||||
|
*/
|
||||||
|
static PublicationObjSpec *
|
||||||
|
BuildPublicationRelationObjSpec(Oid relationId, Oid publicationId,
|
||||||
|
bool tableOnly)
|
||||||
|
{
|
||||||
|
HeapTuple pubRelationTuple = SearchSysCache2(PUBLICATIONRELMAP,
|
||||||
|
ObjectIdGetDatum(relationId),
|
||||||
|
ObjectIdGetDatum(publicationId));
|
||||||
|
if (!HeapTupleIsValid(pubRelationTuple))
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("cannot find relation with oid %d in publication "
|
||||||
|
"with oid %d", relationId, publicationId)));
|
||||||
|
}
|
||||||
|
|
||||||
|
List *columnNameList = NIL;
|
||||||
|
Node *whereClause = NULL;
|
||||||
|
|
||||||
|
/* build the column list */
|
||||||
|
if (!tableOnly)
|
||||||
|
{
|
||||||
|
bool isNull = false;
|
||||||
|
Datum attributesDatum = SysCacheGetAttr(PUBLICATIONRELMAP, pubRelationTuple,
|
||||||
|
Anum_pg_publication_rel_prattrs,
|
||||||
|
&isNull);
|
||||||
|
if (!isNull)
|
||||||
|
{
|
||||||
|
ArrayType *attributesArray = DatumGetArrayTypeP(attributesDatum);
|
||||||
|
int attributeCount = ARR_DIMS(attributesArray)[0];
|
||||||
|
int16 *elems = (int16 *) ARR_DATA_PTR(attributesArray);
|
||||||
|
|
||||||
|
for (int attNumIndex = 0; attNumIndex < attributeCount; attNumIndex++)
|
||||||
|
{
|
||||||
|
AttrNumber attributeNumber = elems[attNumIndex];
|
||||||
|
char *columnName = get_attname(relationId, attributeNumber, false);
|
||||||
|
|
||||||
|
columnNameList = lappend(columnNameList, makeString(columnName));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* build the WHERE clause */
|
||||||
|
Datum whereClauseDatum = SysCacheGetAttr(PUBLICATIONRELMAP, pubRelationTuple,
|
||||||
|
Anum_pg_publication_rel_prqual,
|
||||||
|
&isNull);
|
||||||
|
if (!isNull)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We use the already-transformed parse tree form here, which does
|
||||||
|
* not match regular CreatePublicationStmt
|
||||||
|
*/
|
||||||
|
whereClause = stringToNode(TextDatumGetCString(whereClauseDatum));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ReleaseSysCache(pubRelationTuple);
|
||||||
|
|
||||||
|
char *schemaName = get_namespace_name(get_rel_namespace(relationId));
|
||||||
|
char *tableName = get_rel_name(relationId);
|
||||||
|
RangeVar *rangeVar = makeRangeVar(schemaName, tableName, -1);
|
||||||
|
|
||||||
|
/* build the FOR TABLE */
|
||||||
|
PublicationTable *publicationTable =
|
||||||
|
makeNode(PublicationTable);
|
||||||
|
publicationTable->relation = rangeVar;
|
||||||
|
publicationTable->whereClause = whereClause;
|
||||||
|
publicationTable->columns = columnNameList;
|
||||||
|
|
||||||
|
PublicationObjSpec *publicationObject = makeNode(PublicationObjSpec);
|
||||||
|
publicationObject->pubobjtype = PUBLICATIONOBJ_TABLE;
|
||||||
|
publicationObject->pubtable = publicationTable;
|
||||||
|
publicationObject->name = NULL;
|
||||||
|
publicationObject->location = -1;
|
||||||
|
|
||||||
|
return publicationObject;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PreprocessAlterPublicationStmt handles ALTER PUBLICATION statements
|
||||||
|
* in a way that is mostly similar to PreprocessAlterDistributedObjectStmt,
|
||||||
|
* except we do not ensure sequential mode (publications do not interact with
|
||||||
|
* shards) and can handle NULL deparse commands for ALTER PUBLICATION commands
|
||||||
|
* that only involve local tables.
|
||||||
|
*/
|
||||||
|
List *
|
||||||
|
PreprocessAlterPublicationStmt(Node *stmt, const char *queryString,
|
||||||
|
ProcessUtilityContext processUtilityContext)
|
||||||
|
{
|
||||||
|
List *addresses = GetObjectAddressListFromParseTree(stmt, false, false);
|
||||||
|
|
||||||
|
/* the code-path only supports a single object */
|
||||||
|
Assert(list_length(addresses) == 1);
|
||||||
|
|
||||||
|
if (!ShouldPropagateAnyObject(addresses))
|
||||||
|
{
|
||||||
|
return NIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
EnsureCoordinator();
|
||||||
|
QualifyTreeNode(stmt);
|
||||||
|
|
||||||
|
const char *sql = DeparseTreeNode((Node *) stmt);
|
||||||
|
if (sql == NULL)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Deparsing logic decided that there is nothing to propagate, e.g.
|
||||||
|
* because the command only concerns local tables.
|
||||||
|
*/
|
||||||
|
return NIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
List *commands = list_make3(DISABLE_DDL_PROPAGATION,
|
||||||
|
(void *) sql,
|
||||||
|
ENABLE_DDL_PROPAGATION);
|
||||||
|
|
||||||
|
return NodeDDLTaskList(NON_COORDINATOR_NODES, commands);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetAlterPublicationDDLCommandsForTable gets a list of ALTER PUBLICATION .. ADD/DROP
|
||||||
|
* commands for the given table.
|
||||||
|
*
|
||||||
|
* If isAdd is true, it return ALTER PUBLICATION .. ADD TABLE commands for all
|
||||||
|
* publications.
|
||||||
|
*
|
||||||
|
* Otherwise, it returns ALTER PUBLICATION .. DROP TABLE commands for all
|
||||||
|
* publications.
|
||||||
|
*/
|
||||||
|
List *
|
||||||
|
GetAlterPublicationDDLCommandsForTable(Oid relationId, bool isAdd)
|
||||||
|
{
|
||||||
|
List *commands = NIL;
|
||||||
|
|
||||||
|
List *publicationIds = GetRelationPublications(relationId);
|
||||||
|
Oid publicationId = InvalidOid;
|
||||||
|
|
||||||
|
foreach_oid(publicationId, publicationIds)
|
||||||
|
{
|
||||||
|
char *command = GetAlterPublicationTableDDLCommand(publicationId,
|
||||||
|
relationId, isAdd);
|
||||||
|
|
||||||
|
commands = lappend(commands, command);
|
||||||
|
}
|
||||||
|
|
||||||
|
return commands;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetAlterPublicationTableDDLCommand generates an ALTer PUBLICATION .. ADD/DROP TABLE
|
||||||
|
* command for the given publication and relation ID.
|
||||||
|
*
|
||||||
|
* If isAdd is true, it return an ALTER PUBLICATION .. ADD TABLE command.
|
||||||
|
* Otherwise, it returns ALTER PUBLICATION .. DROP TABLE command.
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
GetAlterPublicationTableDDLCommand(Oid publicationId, Oid relationId,
|
||||||
|
bool isAdd)
|
||||||
|
{
|
||||||
|
HeapTuple pubTuple = SearchSysCache1(PUBLICATIONOID,
|
||||||
|
ObjectIdGetDatum(publicationId));
|
||||||
|
if (!HeapTupleIsValid(pubTuple))
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("cannot find publication with oid: %d",
|
||||||
|
publicationId)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Form_pg_publication pubForm = (Form_pg_publication) GETSTRUCT(pubTuple);
|
||||||
|
|
||||||
|
AlterPublicationStmt *alterPubStmt = makeNode(AlterPublicationStmt);
|
||||||
|
alterPubStmt->pubname = pstrdup(NameStr(pubForm->pubname));
|
||||||
|
|
||||||
|
ReleaseSysCache(pubTuple);
|
||||||
|
|
||||||
|
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||||
|
bool tableOnly = !isAdd;
|
||||||
|
|
||||||
|
/* since postgres 15, tables can have a column list and filter */
|
||||||
|
PublicationObjSpec *publicationObject =
|
||||||
|
BuildPublicationRelationObjSpec(relationId, publicationId, tableOnly);
|
||||||
|
|
||||||
|
alterPubStmt->pubobjects = lappend(alterPubStmt->pubobjects, publicationObject);
|
||||||
|
alterPubStmt->action = isAdd ? AP_AddObjects : AP_DropObjects;
|
||||||
|
#else
|
||||||
|
|
||||||
|
/* before postgres 15, only full tables are supported */
|
||||||
|
char *schemaName = get_namespace_name(get_rel_namespace(relationId));
|
||||||
|
char *tableName = get_rel_name(relationId);
|
||||||
|
RangeVar *rangeVar = makeRangeVar(schemaName, tableName, -1);
|
||||||
|
|
||||||
|
alterPubStmt->tables = lappend(alterPubStmt->tables, rangeVar);
|
||||||
|
alterPubStmt->tableAction = isAdd ? DEFELEM_ADD : DEFELEM_DROP;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* we take the WHERE clause from the catalog where it is already transformed */
|
||||||
|
bool whereClauseNeedsTransform = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We use these commands to restore publications before/after transforming a
|
||||||
|
* table, including transformations to/from local tables.
|
||||||
|
*/
|
||||||
|
bool includeLocalTables = true;
|
||||||
|
|
||||||
|
char *command = DeparseAlterPublicationStmtExtended((Node *) alterPubStmt,
|
||||||
|
whereClauseNeedsTransform,
|
||||||
|
includeLocalTables);
|
||||||
|
|
||||||
|
return command;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AlterPublicationOwnerCommand returns "ALTER PUBLICATION .. OWNER TO .."
|
||||||
|
* statement for the specified publication.
|
||||||
|
*/
|
||||||
|
static char *
|
||||||
|
AlterPublicationOwnerCommand(Oid publicationId)
|
||||||
|
{
|
||||||
|
HeapTuple publicationTuple =
|
||||||
|
SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(publicationId));
|
||||||
|
|
||||||
|
if (!HeapTupleIsValid(publicationTuple))
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("cannot find publication with oid: %d",
|
||||||
|
publicationId)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Form_pg_publication publicationForm =
|
||||||
|
(Form_pg_publication) GETSTRUCT(publicationTuple);
|
||||||
|
|
||||||
|
char *publicationName = NameStr(publicationForm->pubname);
|
||||||
|
Oid publicationOwnerId = publicationForm->pubowner;
|
||||||
|
|
||||||
|
char *publicationOwnerName = GetUserNameFromId(publicationOwnerId, false);
|
||||||
|
|
||||||
|
StringInfo alterCommand = makeStringInfo();
|
||||||
|
appendStringInfo(alterCommand, "ALTER PUBLICATION %s OWNER TO %s",
|
||||||
|
quote_identifier(publicationName),
|
||||||
|
quote_identifier(publicationOwnerName));
|
||||||
|
|
||||||
|
ReleaseSysCache(publicationTuple);
|
||||||
|
|
||||||
|
return alterCommand->data;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ShouldPropagateCreatePublication tests if we need to propagate a CREATE PUBLICATION
|
||||||
|
* statement.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
ShouldPropagateCreatePublication(CreatePublicationStmt *stmt)
|
||||||
|
{
|
||||||
|
if (!ShouldPropagate())
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ShouldPropagateCreateInCoordinatedTransction())
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AlterPublicationStmtObjectAddress generates the object address for the
|
||||||
|
* publication altered by a regular ALTER PUBLICATION .. statement.
|
||||||
|
*/
|
||||||
|
List *
|
||||||
|
AlterPublicationStmtObjectAddress(Node *node, bool missingOk, bool isPostProcess)
|
||||||
|
{
|
||||||
|
AlterPublicationStmt *stmt = castNode(AlterPublicationStmt, node);
|
||||||
|
|
||||||
|
return ObjectAddressForPublicationName(stmt->pubname, missingOk);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AlterPublicationOwnerStmtObjectAddress generates the object address for the
|
||||||
|
* publication altered by the given ALTER PUBLICATION .. OWNER TO statement.
|
||||||
|
*/
|
||||||
|
List *
|
||||||
|
AlterPublicationOwnerStmtObjectAddress(Node *node, bool missingOk, bool isPostProcess)
|
||||||
|
{
|
||||||
|
AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node);
|
||||||
|
|
||||||
|
return ObjectAddressForPublicationName(strVal(stmt->object), missingOk);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CreatePublicationStmtObjectAddress generates the object address for the
|
||||||
|
* publication created by the given CREATE PUBLICATION statement.
|
||||||
|
*/
|
||||||
|
List *
|
||||||
|
CreatePublicationStmtObjectAddress(Node *node, bool missingOk, bool isPostProcess)
|
||||||
|
{
|
||||||
|
CreatePublicationStmt *stmt = castNode(CreatePublicationStmt, node);
|
||||||
|
|
||||||
|
return ObjectAddressForPublicationName(stmt->pubname, missingOk);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RenamePublicationStmtObjectAddress generates the object address for the
|
||||||
|
* publication altered by the given ALter PUBLICATION .. RENAME TO statement.
|
||||||
|
*/
|
||||||
|
List *
|
||||||
|
RenamePublicationStmtObjectAddress(Node *node, bool missingOk, bool isPostprocess)
|
||||||
|
{
|
||||||
|
RenameStmt *stmt = castNode(RenameStmt, node);
|
||||||
|
|
||||||
|
return ObjectAddressForPublicationName(strVal(stmt->object), missingOk);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ObjectAddressForPublicationName returns the object address for a given publication
|
||||||
|
* name.
|
||||||
|
*/
|
||||||
|
static List *
|
||||||
|
ObjectAddressForPublicationName(char *publicationName, bool missingOk)
|
||||||
|
{
|
||||||
|
Oid publicationId = InvalidOid;
|
||||||
|
|
||||||
|
HeapTuple publicationTuple =
|
||||||
|
SearchSysCache1(PUBLICATIONNAME, CStringGetDatum(publicationName));
|
||||||
|
if (HeapTupleIsValid(publicationTuple))
|
||||||
|
{
|
||||||
|
Form_pg_publication publicationForm =
|
||||||
|
(Form_pg_publication) GETSTRUCT(publicationTuple);
|
||||||
|
publicationId = publicationForm->oid;
|
||||||
|
|
||||||
|
ReleaseSysCache(publicationTuple);
|
||||||
|
}
|
||||||
|
else if (!missingOk)
|
||||||
|
{
|
||||||
|
/* it should have just been created */
|
||||||
|
ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT),
|
||||||
|
errmsg("publication \"%s\" does not exist", publicationName)));
|
||||||
|
}
|
||||||
|
|
||||||
|
ObjectAddress *address = palloc0(sizeof(ObjectAddress));
|
||||||
|
ObjectAddressSet(*address, PublicationRelationId, publicationId);
|
||||||
|
|
||||||
|
return list_make1(address);
|
||||||
|
}
|
|
@ -33,7 +33,8 @@
|
||||||
|
|
||||||
/* Local functions forward declarations for helper functions */
|
/* Local functions forward declarations for helper functions */
|
||||||
static bool OptionsSpecifyOwnedBy(List *optionList, Oid *ownedByTableId);
|
static bool OptionsSpecifyOwnedBy(List *optionList, Oid *ownedByTableId);
|
||||||
static Oid SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress);
|
static Oid SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress, char
|
||||||
|
depType);
|
||||||
static List * FilterDistributedSequences(GrantStmt *stmt);
|
static List * FilterDistributedSequences(GrantStmt *stmt);
|
||||||
|
|
||||||
|
|
||||||
|
@ -183,7 +184,7 @@ ExtractDefaultColumnsAndOwnedSequences(Oid relationId, List **columnNameList,
|
||||||
|
|
||||||
char *columnName = NameStr(attributeForm->attname);
|
char *columnName = NameStr(attributeForm->attname);
|
||||||
List *columnOwnedSequences =
|
List *columnOwnedSequences =
|
||||||
getOwnedSequences_internal(relationId, attributeIndex + 1, 0);
|
getOwnedSequences_internal(relationId, attributeIndex + 1, DEPENDENCY_AUTO);
|
||||||
|
|
||||||
if (attributeForm->atthasdef && list_length(columnOwnedSequences) == 0)
|
if (attributeForm->atthasdef && list_length(columnOwnedSequences) == 0)
|
||||||
{
|
{
|
||||||
|
@ -453,21 +454,22 @@ PreprocessAlterSequenceStmt(Node *node, const char *queryString,
|
||||||
/* the code-path only supports a single object */
|
/* the code-path only supports a single object */
|
||||||
Assert(list_length(addresses) == 1);
|
Assert(list_length(addresses) == 1);
|
||||||
|
|
||||||
|
/* We have already asserted that we have exactly 1 address in the addresses. */
|
||||||
|
ObjectAddress *address = linitial(addresses);
|
||||||
|
|
||||||
/* error out if the sequence is distributed */
|
/* error out if the sequence is distributed */
|
||||||
if (IsAnyObjectDistributed(addresses))
|
if (IsAnyObjectDistributed(addresses) || SequenceUsedInDistributedTable(address,
|
||||||
|
DEPENDENCY_INTERNAL))
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errmsg(
|
ereport(ERROR, (errmsg(
|
||||||
"Altering a distributed sequence is currently not supported.")));
|
"Altering a distributed sequence is currently not supported.")));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We have already asserted that we have exactly 1 address in the addresses. */
|
|
||||||
ObjectAddress *address = linitial(addresses);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* error out if the sequence is used in a distributed table
|
* error out if the sequence is used in a distributed table
|
||||||
* and this is an ALTER SEQUENCE .. AS .. statement
|
* and this is an ALTER SEQUENCE .. AS .. statement
|
||||||
*/
|
*/
|
||||||
Oid citusTableId = SequenceUsedInDistributedTable(address);
|
Oid citusTableId = SequenceUsedInDistributedTable(address, DEPENDENCY_AUTO);
|
||||||
if (citusTableId != InvalidOid)
|
if (citusTableId != InvalidOid)
|
||||||
{
|
{
|
||||||
List *options = stmt->options;
|
List *options = stmt->options;
|
||||||
|
@ -497,16 +499,19 @@ PreprocessAlterSequenceStmt(Node *node, const char *queryString,
|
||||||
* SequenceUsedInDistributedTable returns true if the argument sequence
|
* SequenceUsedInDistributedTable returns true if the argument sequence
|
||||||
* is used as the default value of a column in a distributed table.
|
* is used as the default value of a column in a distributed table.
|
||||||
* Returns false otherwise
|
* Returns false otherwise
|
||||||
|
* See DependencyType for the possible values of depType.
|
||||||
|
* We use DEPENDENCY_INTERNAL for sequences created by identity column.
|
||||||
|
* DEPENDENCY_AUTO for regular sequences.
|
||||||
*/
|
*/
|
||||||
static Oid
|
static Oid
|
||||||
SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress)
|
SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress, char depType)
|
||||||
{
|
{
|
||||||
List *citusTableIdList = CitusTableTypeIdList(ANY_CITUS_TABLE_TYPE);
|
List *citusTableIdList = CitusTableTypeIdList(ANY_CITUS_TABLE_TYPE);
|
||||||
Oid citusTableId = InvalidOid;
|
Oid citusTableId = InvalidOid;
|
||||||
foreach_oid(citusTableId, citusTableIdList)
|
foreach_oid(citusTableId, citusTableIdList)
|
||||||
{
|
{
|
||||||
List *seqInfoList = NIL;
|
List *seqInfoList = NIL;
|
||||||
GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0);
|
GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0, depType);
|
||||||
SequenceInfo *seqInfo = NULL;
|
SequenceInfo *seqInfo = NULL;
|
||||||
foreach_ptr(seqInfo, seqInfoList)
|
foreach_ptr(seqInfo, seqInfoList)
|
||||||
{
|
{
|
||||||
|
|
|
@ -75,7 +75,7 @@ static void DistributePartitionUsingParent(Oid parentRelationId,
|
||||||
static void ErrorIfMultiLevelPartitioning(Oid parentRelationId, Oid partitionRelationId);
|
static void ErrorIfMultiLevelPartitioning(Oid parentRelationId, Oid partitionRelationId);
|
||||||
static void ErrorIfAttachCitusTableToPgLocalTable(Oid parentRelationId,
|
static void ErrorIfAttachCitusTableToPgLocalTable(Oid parentRelationId,
|
||||||
Oid partitionRelationId);
|
Oid partitionRelationId);
|
||||||
static bool AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(
|
static bool ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(
|
||||||
AlterTableStmt *alterTableStatement);
|
AlterTableStmt *alterTableStatement);
|
||||||
static bool ShouldMarkConnectedRelationsNotAutoConverted(Oid leftRelationId,
|
static bool ShouldMarkConnectedRelationsNotAutoConverted(Oid leftRelationId,
|
||||||
Oid rightRelationId);
|
Oid rightRelationId);
|
||||||
|
@ -1119,7 +1119,7 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
|
||||||
|
|
||||||
if (ShouldEnableLocalReferenceForeignKeys() &&
|
if (ShouldEnableLocalReferenceForeignKeys() &&
|
||||||
processUtilityContext != PROCESS_UTILITY_SUBCOMMAND &&
|
processUtilityContext != PROCESS_UTILITY_SUBCOMMAND &&
|
||||||
AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(alterTableStatement))
|
ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(alterTableStatement))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* We don't process subcommands generated by postgres.
|
* We don't process subcommands generated by postgres.
|
||||||
|
@ -1378,29 +1378,6 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* We check for ADD COLUMN .. GENERATED .. AS IDENTITY expr
|
|
||||||
* since it uses a sequence as an internal dependency
|
|
||||||
* we should deparse the statement
|
|
||||||
*/
|
|
||||||
constraint = NULL;
|
|
||||||
foreach_ptr(constraint, columnConstraints)
|
|
||||||
{
|
|
||||||
if (constraint->contype == CONSTR_IDENTITY)
|
|
||||||
{
|
|
||||||
deparseAT = true;
|
|
||||||
useInitialDDLCommandString = false;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Since we don't support constraints for AT_AddColumn
|
|
||||||
* we have to set is_not_null to true explicitly for identity columns
|
|
||||||
*/
|
|
||||||
ColumnDef *newColDef = copyObject(columnDefinition);
|
|
||||||
newColDef->constraints = NULL;
|
|
||||||
newColDef->is_not_null = true;
|
|
||||||
newCmd->def = (Node *) newColDef;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We check for ADD COLUMN .. SERIAL pseudo-type
|
* We check for ADD COLUMN .. SERIAL pseudo-type
|
||||||
|
@ -1584,12 +1561,12 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* AlterTableDefinesFKeyBetweenPostgresAndNonDistTable returns true if given
|
* ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef returns true if given
|
||||||
* alter table command defines foreign key between a postgres table and a
|
* alter table command defines foreign key between a postgres table and a
|
||||||
* reference or citus local table.
|
* reference or citus local table.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(AlterTableStmt *alterTableStatement)
|
ATDefinesFKeyBetweenPostgresAndCitusLocalOrRef(AlterTableStmt *alterTableStatement)
|
||||||
{
|
{
|
||||||
List *foreignKeyConstraintList =
|
List *foreignKeyConstraintList =
|
||||||
GetAlterTableAddFKeyConstraintList(alterTableStatement);
|
GetAlterTableAddFKeyConstraintList(alterTableStatement);
|
||||||
|
@ -1607,9 +1584,12 @@ AlterTableDefinesFKeyBetweenPostgresAndNonDistTable(AlterTableStmt *alterTableSt
|
||||||
if (!IsCitusTable(leftRelationId))
|
if (!IsCitusTable(leftRelationId))
|
||||||
{
|
{
|
||||||
return RelationIdListContainsCitusTableType(rightRelationIdList,
|
return RelationIdListContainsCitusTableType(rightRelationIdList,
|
||||||
CITUS_TABLE_WITH_NO_DIST_KEY);
|
CITUS_LOCAL_TABLE) ||
|
||||||
|
RelationIdListContainsCitusTableType(rightRelationIdList,
|
||||||
|
REFERENCE_TABLE);
|
||||||
}
|
}
|
||||||
else if (IsCitusTableType(leftRelationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
else if (IsCitusTableType(leftRelationId, CITUS_LOCAL_TABLE) ||
|
||||||
|
IsCitusTableType(leftRelationId, REFERENCE_TABLE))
|
||||||
{
|
{
|
||||||
return RelationIdListContainsPostgresTable(rightRelationIdList);
|
return RelationIdListContainsPostgresTable(rightRelationIdList);
|
||||||
}
|
}
|
||||||
|
@ -2539,34 +2519,6 @@ PostprocessAlterTableStmt(AlterTableStmt *alterTableStatement)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* We check for ADD COLUMN .. GENERATED AS IDENTITY expr
|
|
||||||
* since it uses a seqeunce as an internal dependency
|
|
||||||
*/
|
|
||||||
constraint = NULL;
|
|
||||||
foreach_ptr(constraint, columnConstraints)
|
|
||||||
{
|
|
||||||
if (constraint->contype == CONSTR_IDENTITY)
|
|
||||||
{
|
|
||||||
AttrNumber attnum = get_attnum(relationId,
|
|
||||||
columnDefinition->colname);
|
|
||||||
bool missing_ok = false;
|
|
||||||
Oid seqOid = getIdentitySequence(relationId, attnum, missing_ok);
|
|
||||||
|
|
||||||
if (ShouldSyncTableMetadata(relationId))
|
|
||||||
{
|
|
||||||
needMetadataSyncForNewSequences = true;
|
|
||||||
alterTableDefaultNextvalCmd =
|
|
||||||
GetAddColumnWithNextvalDefaultCmd(seqOid,
|
|
||||||
relationId,
|
|
||||||
columnDefinition
|
|
||||||
->colname,
|
|
||||||
columnDefinition
|
|
||||||
->typeName);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* We check for ALTER COLUMN .. SET DEFAULT nextval('user_defined_seq')
|
* We check for ALTER COLUMN .. SET DEFAULT nextval('user_defined_seq')
|
||||||
|
@ -3222,6 +3174,17 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement)
|
||||||
{
|
{
|
||||||
if (columnConstraint->contype == CONSTR_IDENTITY)
|
if (columnConstraint->contype == CONSTR_IDENTITY)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* We currently don't support adding an identity column for an MX table
|
||||||
|
*/
|
||||||
|
if (ShouldSyncTableMetadata(relationId))
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg(
|
||||||
|
"cannot execute ADD COLUMN commands involving identity"
|
||||||
|
" columns when metadata is synchronized to workers")));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Currently we don't support backfilling the new identity column with default values
|
* Currently we don't support backfilling the new identity column with default values
|
||||||
* if the table is not empty
|
* if the table is not empty
|
||||||
|
@ -3352,7 +3315,8 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement)
|
||||||
*/
|
*/
|
||||||
AttrNumber attnum = get_attnum(relationId, command->name);
|
AttrNumber attnum = get_attnum(relationId, command->name);
|
||||||
List *seqInfoList = NIL;
|
List *seqInfoList = NIL;
|
||||||
GetDependentSequencesWithRelation(relationId, &seqInfoList, attnum);
|
GetDependentSequencesWithRelation(relationId, &seqInfoList, attnum,
|
||||||
|
DEPENDENCY_AUTO);
|
||||||
if (seqInfoList != NIL)
|
if (seqInfoList != NIL)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errmsg("cannot execute ALTER COLUMN TYPE .. command "
|
ereport(ERROR, (errmsg("cannot execute ALTER COLUMN TYPE .. command "
|
||||||
|
@ -3666,7 +3630,7 @@ SetupExecutionModeForAlterTable(Oid relationId, AlterTableCmd *command)
|
||||||
* sequential mode.
|
* sequential mode.
|
||||||
*/
|
*/
|
||||||
if (executeSequentially &&
|
if (executeSequentially &&
|
||||||
!IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY) &&
|
HasDistributionKey(relationId) &&
|
||||||
ParallelQueryExecutedInTransaction())
|
ParallelQueryExecutedInTransaction())
|
||||||
{
|
{
|
||||||
char *relationName = get_rel_name(relationId);
|
char *relationName = get_rel_name(relationId);
|
||||||
|
@ -4011,3 +3975,59 @@ MakeNameListFromRangeVar(const RangeVar *rel)
|
||||||
return list_make1(makeString(rel->relname));
|
return list_make1(makeString(rel->relname));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ErrorIfTableHasUnsupportedIdentityColumn errors out if the given table has any identity column other than bigint identity column.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ErrorIfTableHasUnsupportedIdentityColumn(Oid relationId)
|
||||||
|
{
|
||||||
|
Relation relation = relation_open(relationId, AccessShareLock);
|
||||||
|
TupleDesc tupleDescriptor = RelationGetDescr(relation);
|
||||||
|
|
||||||
|
for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts;
|
||||||
|
attributeIndex++)
|
||||||
|
{
|
||||||
|
Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex);
|
||||||
|
|
||||||
|
if (attributeForm->attidentity && attributeForm->atttypid != INT8OID)
|
||||||
|
{
|
||||||
|
char *qualifiedRelationName = generate_qualified_relation_name(relationId);
|
||||||
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg(
|
||||||
|
"cannot complete operation on %s with smallint/int identity column",
|
||||||
|
qualifiedRelationName),
|
||||||
|
errhint(
|
||||||
|
"Use bigint identity column instead.")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
relation_close(relation, NoLock);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ErrorIfTableHasIdentityColumn errors out if the given table has identity column
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ErrorIfTableHasIdentityColumn(Oid relationId)
|
||||||
|
{
|
||||||
|
Relation relation = relation_open(relationId, AccessShareLock);
|
||||||
|
TupleDesc tupleDescriptor = RelationGetDescr(relation);
|
||||||
|
|
||||||
|
for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts;
|
||||||
|
attributeIndex++)
|
||||||
|
{
|
||||||
|
Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex);
|
||||||
|
|
||||||
|
if (attributeForm->attidentity)
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg(
|
||||||
|
"cannot complete operation on a table with identity column")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
relation_close(relation, NoLock);
|
||||||
|
}
|
||||||
|
|
|
@ -324,7 +324,7 @@ ExecuteTruncateStmtSequentialIfNecessary(TruncateStmt *command)
|
||||||
{
|
{
|
||||||
Oid relationId = RangeVarGetRelid(rangeVar, NoLock, failOK);
|
Oid relationId = RangeVarGetRelid(rangeVar, NoLock, failOK);
|
||||||
|
|
||||||
if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY) &&
|
if (IsCitusTable(relationId) && !HasDistributionKey(relationId) &&
|
||||||
TableReferenced(relationId))
|
TableReferenced(relationId))
|
||||||
{
|
{
|
||||||
char *relationName = get_rel_name(relationId);
|
char *relationName = get_rel_name(relationId);
|
||||||
|
|
|
@ -53,6 +53,7 @@
|
||||||
#include "distributed/coordinator_protocol.h"
|
#include "distributed/coordinator_protocol.h"
|
||||||
#include "distributed/deparser.h"
|
#include "distributed/deparser.h"
|
||||||
#include "distributed/deparse_shard_query.h"
|
#include "distributed/deparse_shard_query.h"
|
||||||
|
#include "distributed/executor_util.h"
|
||||||
#include "distributed/foreign_key_relationship.h"
|
#include "distributed/foreign_key_relationship.h"
|
||||||
#include "distributed/listutils.h"
|
#include "distributed/listutils.h"
|
||||||
#include "distributed/local_executor.h"
|
#include "distributed/local_executor.h"
|
||||||
|
|
|
@ -1202,6 +1202,17 @@ FinishConnectionEstablishment(MultiConnection *connection)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ForceConnectionCloseAtTransactionEnd marks connection to be closed at the end of the
|
||||||
|
* transaction.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ForceConnectionCloseAtTransactionEnd(MultiConnection *connection)
|
||||||
|
{
|
||||||
|
connection->forceCloseAtTransactionEnd = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ClaimConnectionExclusively signals that this connection is actively being
|
* ClaimConnectionExclusively signals that this connection is actively being
|
||||||
* used. That means it'll not be, again, returned by
|
* used. That means it'll not be, again, returned by
|
||||||
|
@ -1484,6 +1495,7 @@ AfterXactHostConnectionHandling(ConnectionHashEntry *entry, bool isCommit)
|
||||||
* - Current cached connections is already at MaxCachedConnectionsPerWorker
|
* - Current cached connections is already at MaxCachedConnectionsPerWorker
|
||||||
* - Connection is forced to close at the end of transaction
|
* - Connection is forced to close at the end of transaction
|
||||||
* - Connection is not in OK state
|
* - Connection is not in OK state
|
||||||
|
* - Connection has a replication origin setup
|
||||||
* - A transaction is still in progress (usually because we are cancelling a distributed transaction)
|
* - A transaction is still in progress (usually because we are cancelling a distributed transaction)
|
||||||
* - A connection reached its maximum lifetime
|
* - A connection reached its maximum lifetime
|
||||||
*/
|
*/
|
||||||
|
@ -1503,6 +1515,7 @@ ShouldShutdownConnection(MultiConnection *connection, const int cachedConnection
|
||||||
PQstatus(connection->pgConn) != CONNECTION_OK ||
|
PQstatus(connection->pgConn) != CONNECTION_OK ||
|
||||||
!RemoteTransactionIdle(connection) ||
|
!RemoteTransactionIdle(connection) ||
|
||||||
connection->requiresReplication ||
|
connection->requiresReplication ||
|
||||||
|
connection->isReplicationOriginSessionSetup ||
|
||||||
(MaxCachedConnectionLifetime >= 0 &&
|
(MaxCachedConnectionLifetime >= 0 &&
|
||||||
MillisecondsToTimeout(connection->connectionEstablishmentStart,
|
MillisecondsToTimeout(connection->connectionEstablishmentStart,
|
||||||
MaxCachedConnectionLifetime) <= 0);
|
MaxCachedConnectionLifetime) <= 0);
|
||||||
|
|
|
@ -573,6 +573,47 @@ SendRemoteCommand(MultiConnection *connection, const char *command)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ExecuteRemoteCommandAndCheckResult executes the given command in the remote node and
|
||||||
|
* checks if the result is equal to the expected result. If the result is equal to the
|
||||||
|
* expected result, the function returns true, otherwise it returns false.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
ExecuteRemoteCommandAndCheckResult(MultiConnection *connection, char *command,
|
||||||
|
char *expected)
|
||||||
|
{
|
||||||
|
if (!SendRemoteCommand(connection, command))
|
||||||
|
{
|
||||||
|
/* if we cannot connect, we warn and report false */
|
||||||
|
ReportConnectionError(connection, WARNING);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool raiseInterrupts = true;
|
||||||
|
PGresult *queryResult = GetRemoteCommandResult(connection, raiseInterrupts);
|
||||||
|
|
||||||
|
/* if remote node throws an error, we also throw an error */
|
||||||
|
if (!IsResponseOK(queryResult))
|
||||||
|
{
|
||||||
|
ReportResultError(connection, queryResult, ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
StringInfo queryResultString = makeStringInfo();
|
||||||
|
|
||||||
|
/* Evaluate the queryResult and store it into the queryResultString */
|
||||||
|
bool success = EvaluateSingleQueryResult(connection, queryResult, queryResultString);
|
||||||
|
bool result = false;
|
||||||
|
if (success && strcmp(queryResultString->data, expected) == 0)
|
||||||
|
{
|
||||||
|
result = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(queryResult);
|
||||||
|
ForgetResults(connection);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ReadFirstColumnAsText reads the first column of result tuples from the given
|
* ReadFirstColumnAsText reads the first column of result tuples from the given
|
||||||
* PGresult struct and returns them in a StringInfo list.
|
* PGresult struct and returns them in a StringInfo list.
|
||||||
|
|
|
@ -304,10 +304,7 @@ pg_get_sequencedef(Oid sequenceRelationId)
|
||||||
* When it's WORKER_NEXTVAL_SEQUENCE_DEFAULTS, the function creates the DEFAULT
|
* When it's WORKER_NEXTVAL_SEQUENCE_DEFAULTS, the function creates the DEFAULT
|
||||||
* clause using worker_nextval('sequence') and not nextval('sequence')
|
* clause using worker_nextval('sequence') and not nextval('sequence')
|
||||||
* When IncludeIdentities is NO_IDENTITY, the function does not include identity column
|
* When IncludeIdentities is NO_IDENTITY, the function does not include identity column
|
||||||
* specifications. When it's INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS, the function
|
* specifications. When it's INCLUDE_IDENTITY it creates GENERATED .. AS IDENTIY clauses.
|
||||||
* uses sequences and set them as default values for identity columns by using exactly
|
|
||||||
* the same approach with worker_nextval('sequence') & nextval('sequence') logic
|
|
||||||
* desribed above. When it's INCLUDE_IDENTITY it creates GENERATED .. AS IDENTIY clauses.
|
|
||||||
*/
|
*/
|
||||||
char *
|
char *
|
||||||
pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults
|
pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults
|
||||||
|
@ -403,26 +400,9 @@ pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults
|
||||||
Oid seqOid = getIdentitySequence(RelationGetRelid(relation),
|
Oid seqOid = getIdentitySequence(RelationGetRelid(relation),
|
||||||
attributeForm->attnum, missing_ok);
|
attributeForm->attnum, missing_ok);
|
||||||
|
|
||||||
char *sequenceName = generate_qualified_relation_name(seqOid);
|
if (includeIdentityDefaults == INCLUDE_IDENTITY)
|
||||||
|
|
||||||
if (includeIdentityDefaults == INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS)
|
|
||||||
{
|
|
||||||
if (pg_get_sequencedef(seqOid)->seqtypid != INT8OID)
|
|
||||||
{
|
|
||||||
appendStringInfo(&buffer,
|
|
||||||
" DEFAULT worker_nextval(%s::regclass)",
|
|
||||||
quote_literal_cstr(sequenceName));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
appendStringInfo(&buffer, " DEFAULT nextval(%s::regclass)",
|
|
||||||
quote_literal_cstr(sequenceName));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (includeIdentityDefaults == INCLUDE_IDENTITY)
|
|
||||||
{
|
{
|
||||||
Form_pg_sequence pgSequenceForm = pg_get_sequencedef(seqOid);
|
Form_pg_sequence pgSequenceForm = pg_get_sequencedef(seqOid);
|
||||||
uint64 sequenceStart = nextval_internal(seqOid, false);
|
|
||||||
char *sequenceDef = psprintf(
|
char *sequenceDef = psprintf(
|
||||||
" GENERATED %s AS IDENTITY (INCREMENT BY " INT64_FORMAT \
|
" GENERATED %s AS IDENTITY (INCREMENT BY " INT64_FORMAT \
|
||||||
" MINVALUE " INT64_FORMAT " MAXVALUE "
|
" MINVALUE " INT64_FORMAT " MAXVALUE "
|
||||||
|
@ -433,7 +413,8 @@ pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults
|
||||||
"ALWAYS" : "BY DEFAULT",
|
"ALWAYS" : "BY DEFAULT",
|
||||||
pgSequenceForm->seqincrement,
|
pgSequenceForm->seqincrement,
|
||||||
pgSequenceForm->seqmin,
|
pgSequenceForm->seqmin,
|
||||||
pgSequenceForm->seqmax, sequenceStart,
|
pgSequenceForm->seqmax,
|
||||||
|
pgSequenceForm->seqstart,
|
||||||
pgSequenceForm->seqcache,
|
pgSequenceForm->seqcache,
|
||||||
pgSequenceForm->seqcycle ? "" : "NO ");
|
pgSequenceForm->seqcycle ? "" : "NO ");
|
||||||
|
|
||||||
|
@ -1391,7 +1372,7 @@ convert_aclright_to_string(int aclright)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* contain_nextval_expression_walker walks over expression tree and returns
|
* contain_nextval_expression_walker walks over expression tree and returns
|
||||||
* true if it contains call to 'nextval' function.
|
* true if it contains call to 'nextval' function or it has an identity column.
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
contain_nextval_expression_walker(Node *node, void *context)
|
contain_nextval_expression_walker(Node *node, void *context)
|
||||||
|
@ -1401,6 +1382,13 @@ contain_nextval_expression_walker(Node *node, void *context)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* check if the node contains an identity column */
|
||||||
|
if (IsA(node, NextValueExpr))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check if the node contains call to 'nextval' */
|
||||||
if (IsA(node, FuncExpr))
|
if (IsA(node, FuncExpr))
|
||||||
{
|
{
|
||||||
FuncExpr *funcExpr = (FuncExpr *) node;
|
FuncExpr *funcExpr = (FuncExpr *) node;
|
||||||
|
|
|
@ -0,0 +1,690 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* deparse_publication_stmts.c
|
||||||
|
* All routines to deparse publication statements.
|
||||||
|
*
|
||||||
|
* Copyright (c) Citus Data, Inc.
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "access/relation.h"
|
||||||
|
#include "catalog/namespace.h"
|
||||||
|
#include "commands/defrem.h"
|
||||||
|
#include "distributed/citus_ruleutils.h"
|
||||||
|
#include "distributed/deparser.h"
|
||||||
|
#include "distributed/listutils.h"
|
||||||
|
#include "distributed/namespace_utils.h"
|
||||||
|
#include "lib/stringinfo.h"
|
||||||
|
#include "parser/parse_clause.h"
|
||||||
|
#include "parser/parse_collate.h"
|
||||||
|
#include "parser/parse_node.h"
|
||||||
|
#include "parser/parse_relation.h"
|
||||||
|
#include "nodes/value.h"
|
||||||
|
#include "utils/builtins.h"
|
||||||
|
#include "utils/lsyscache.h"
|
||||||
|
#include "utils/ruleutils.h"
|
||||||
|
|
||||||
|
|
||||||
|
static void AppendCreatePublicationStmt(StringInfo buf, CreatePublicationStmt *stmt,
|
||||||
|
bool whereClauseNeedsTransform,
|
||||||
|
bool includeLocalTables);
|
||||||
|
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||||
|
static bool AppendPublicationObjects(StringInfo buf, List *publicationObjects,
|
||||||
|
bool whereClauseNeedsTransform,
|
||||||
|
bool includeLocalTables);
|
||||||
|
static void AppendWhereClauseExpression(StringInfo buf, RangeVar *tableName,
|
||||||
|
Node *whereClause,
|
||||||
|
bool whereClauseNeedsTransform);
|
||||||
|
static void AppendAlterPublicationAction(StringInfo buf, AlterPublicationAction action);
|
||||||
|
#else
|
||||||
|
static bool AppendTables(StringInfo buf, List *tables, bool includeLocalTables);
|
||||||
|
static void AppendDefElemAction(StringInfo buf, DefElemAction action);
|
||||||
|
#endif
|
||||||
|
static bool AppendAlterPublicationStmt(StringInfo buf, AlterPublicationStmt *stmt,
|
||||||
|
bool whereClauseNeedsTransform,
|
||||||
|
bool includeLocalTables);
|
||||||
|
static void AppendDropPublicationStmt(StringInfo buf, DropStmt *stmt);
|
||||||
|
static void AppendRenamePublicationStmt(StringInfo buf, RenameStmt *stmt);
|
||||||
|
static void AppendAlterPublicationOwnerStmt(StringInfo buf, AlterOwnerStmt *stmt);
|
||||||
|
static void AppendPublicationOptions(StringInfo stringBuffer, List *optionList);
|
||||||
|
static void AppendIdentifierList(StringInfo buf, List *objects);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DeparseCreatePublicationStmt builds and returns a string representing a
|
||||||
|
* CreatePublicationStmt.
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
DeparseCreatePublicationStmt(Node *node)
|
||||||
|
{
|
||||||
|
/* regular deparsing function takes CREATE PUBLICATION from the parser */
|
||||||
|
bool whereClauseNeedsTransform = false;
|
||||||
|
|
||||||
|
/* for regular CREATE PUBLICATION we do not propagate local tables */
|
||||||
|
bool includeLocalTables = false;
|
||||||
|
|
||||||
|
return DeparseCreatePublicationStmtExtended(node, whereClauseNeedsTransform,
|
||||||
|
includeLocalTables);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DeparseCreatePublicationStmtExtended builds and returns a string representing a
|
||||||
|
* CreatePublicationStmt, which may have already-transformed expressions.
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
DeparseCreatePublicationStmtExtended(Node *node, bool whereClauseNeedsTransform,
|
||||||
|
bool includeLocalTables)
|
||||||
|
{
|
||||||
|
CreatePublicationStmt *stmt = castNode(CreatePublicationStmt, node);
|
||||||
|
|
||||||
|
StringInfoData str = { 0 };
|
||||||
|
initStringInfo(&str);
|
||||||
|
|
||||||
|
AppendCreatePublicationStmt(&str, stmt, whereClauseNeedsTransform,
|
||||||
|
includeLocalTables);
|
||||||
|
|
||||||
|
return str.data;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AppendCreatePublicationStmt appends a string representing a
|
||||||
|
* CreatePublicationStmt to a buffer.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AppendCreatePublicationStmt(StringInfo buf, CreatePublicationStmt *stmt,
|
||||||
|
bool whereClauseNeedsTransform,
|
||||||
|
bool includeLocalTables)
|
||||||
|
{
|
||||||
|
appendStringInfo(buf, "CREATE PUBLICATION %s",
|
||||||
|
quote_identifier(stmt->pubname));
|
||||||
|
|
||||||
|
if (stmt->for_all_tables)
|
||||||
|
{
|
||||||
|
appendStringInfoString(buf, " FOR ALL TABLES");
|
||||||
|
}
|
||||||
|
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||||
|
else if (stmt->pubobjects != NIL)
|
||||||
|
{
|
||||||
|
bool hasObjects = false;
|
||||||
|
PublicationObjSpec *publicationObject = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check whether there are objects to propagate, mainly to know whether
|
||||||
|
* we should include "FOR".
|
||||||
|
*/
|
||||||
|
foreach_ptr(publicationObject, stmt->pubobjects)
|
||||||
|
{
|
||||||
|
if (publicationObject->pubobjtype == PUBLICATIONOBJ_TABLE)
|
||||||
|
{
|
||||||
|
/* FOR TABLE ... */
|
||||||
|
PublicationTable *publicationTable = publicationObject->pubtable;
|
||||||
|
|
||||||
|
if (includeLocalTables ||
|
||||||
|
IsCitusTableRangeVar(publicationTable->relation, NoLock, false))
|
||||||
|
{
|
||||||
|
hasObjects = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
hasObjects = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasObjects)
|
||||||
|
{
|
||||||
|
appendStringInfoString(buf, " FOR");
|
||||||
|
AppendPublicationObjects(buf, stmt->pubobjects, whereClauseNeedsTransform,
|
||||||
|
includeLocalTables);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
else if (stmt->tables != NIL)
|
||||||
|
{
|
||||||
|
bool hasTables = false;
|
||||||
|
RangeVar *rangeVar = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check whether there are tables to propagate, mainly to know whether
|
||||||
|
* we should include "FOR".
|
||||||
|
*/
|
||||||
|
foreach_ptr(rangeVar, stmt->tables)
|
||||||
|
{
|
||||||
|
if (includeLocalTables || IsCitusTableRangeVar(rangeVar, NoLock, false))
|
||||||
|
{
|
||||||
|
hasTables = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasTables)
|
||||||
|
{
|
||||||
|
appendStringInfoString(buf, " FOR");
|
||||||
|
AppendTables(buf, stmt->tables, includeLocalTables);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (stmt->options != NIL)
|
||||||
|
{
|
||||||
|
appendStringInfoString(buf, " WITH (");
|
||||||
|
AppendPublicationOptions(buf, stmt->options);
|
||||||
|
appendStringInfoString(buf, ")");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AppendPublicationObjects appends a string representing a list of publication
|
||||||
|
* objects to a buffer.
|
||||||
|
*
|
||||||
|
* For instance: TABLE users, departments, TABLES IN SCHEMA production
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
AppendPublicationObjects(StringInfo buf, List *publicationObjects,
|
||||||
|
bool whereClauseNeedsTransform,
|
||||||
|
bool includeLocalTables)
|
||||||
|
{
|
||||||
|
PublicationObjSpec *publicationObject = NULL;
|
||||||
|
bool appendedObject = false;
|
||||||
|
|
||||||
|
foreach_ptr(publicationObject, publicationObjects)
|
||||||
|
{
|
||||||
|
if (publicationObject->pubobjtype == PUBLICATIONOBJ_TABLE)
|
||||||
|
{
|
||||||
|
/* FOR TABLE ... */
|
||||||
|
PublicationTable *publicationTable = publicationObject->pubtable;
|
||||||
|
RangeVar *rangeVar = publicationTable->relation;
|
||||||
|
char *schemaName = rangeVar->schemaname;
|
||||||
|
char *tableName = rangeVar->relname;
|
||||||
|
|
||||||
|
if (!includeLocalTables && !IsCitusTableRangeVar(rangeVar, NoLock, false))
|
||||||
|
{
|
||||||
|
/* do not propagate local tables */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (schemaName != NULL)
|
||||||
|
{
|
||||||
|
/* qualified table name */
|
||||||
|
appendStringInfo(buf, "%s TABLE %s",
|
||||||
|
appendedObject ? "," : "",
|
||||||
|
quote_qualified_identifier(schemaName, tableName));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* unqualified table name */
|
||||||
|
appendStringInfo(buf, "%s TABLE %s",
|
||||||
|
appendedObject ? "," : "",
|
||||||
|
quote_identifier(tableName));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (publicationTable->columns != NIL)
|
||||||
|
{
|
||||||
|
appendStringInfoString(buf, " (");
|
||||||
|
AppendIdentifierList(buf, publicationTable->columns);
|
||||||
|
appendStringInfoString(buf, ")");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (publicationTable->whereClause != NULL)
|
||||||
|
{
|
||||||
|
appendStringInfoString(buf, " WHERE (");
|
||||||
|
|
||||||
|
AppendWhereClauseExpression(buf, rangeVar,
|
||||||
|
publicationTable->whereClause,
|
||||||
|
whereClauseNeedsTransform);
|
||||||
|
|
||||||
|
appendStringInfoString(buf, ")");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* FOR TABLES IN SCHEMA */
|
||||||
|
char *schemaName = publicationObject->name;
|
||||||
|
|
||||||
|
if (publicationObject->pubobjtype == PUBLICATIONOBJ_TABLES_IN_CUR_SCHEMA)
|
||||||
|
{
|
||||||
|
List *searchPath = fetch_search_path(false);
|
||||||
|
if (searchPath == NIL)
|
||||||
|
{
|
||||||
|
ereport(ERROR, errcode(ERRCODE_UNDEFINED_SCHEMA),
|
||||||
|
errmsg("no schema has been selected for "
|
||||||
|
"CURRENT_SCHEMA"));
|
||||||
|
}
|
||||||
|
|
||||||
|
schemaName = get_namespace_name(linitial_oid(searchPath));
|
||||||
|
}
|
||||||
|
|
||||||
|
appendStringInfo(buf, "%s TABLES IN SCHEMA %s",
|
||||||
|
appendedObject ? "," : "",
|
||||||
|
quote_identifier(schemaName));
|
||||||
|
}
|
||||||
|
|
||||||
|
appendedObject = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return appendedObject;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AppendWhereClauseExpression appends a deparsed expression that can
|
||||||
|
* contain a filter on the given table. If whereClauseNeedsTransform is set
|
||||||
|
* the expression is first tranformed.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AppendWhereClauseExpression(StringInfo buf, RangeVar *tableName,
|
||||||
|
Node *whereClause, bool whereClauseNeedsTransform)
|
||||||
|
{
|
||||||
|
Relation relation = relation_openrv(tableName, AccessShareLock);
|
||||||
|
|
||||||
|
if (whereClauseNeedsTransform)
|
||||||
|
{
|
||||||
|
ParseState *pstate = make_parsestate(NULL);
|
||||||
|
pstate->p_sourcetext = "";
|
||||||
|
ParseNamespaceItem *nsitem = addRangeTableEntryForRelation(pstate,
|
||||||
|
relation,
|
||||||
|
AccessShareLock, NULL,
|
||||||
|
false, false);
|
||||||
|
addNSItemToQuery(pstate, nsitem, false, true, true);
|
||||||
|
|
||||||
|
whereClause = transformWhereClause(pstate,
|
||||||
|
copyObject(whereClause),
|
||||||
|
EXPR_KIND_WHERE,
|
||||||
|
"PUBLICATION WHERE");
|
||||||
|
|
||||||
|
assign_expr_collations(pstate, whereClause);
|
||||||
|
}
|
||||||
|
|
||||||
|
List *relationContext = deparse_context_for(tableName->relname, relation->rd_id);
|
||||||
|
|
||||||
|
PushOverrideEmptySearchPath(CurrentMemoryContext);
|
||||||
|
char *whereClauseString = deparse_expression(whereClause,
|
||||||
|
relationContext,
|
||||||
|
true, true);
|
||||||
|
PopOverrideSearchPath();
|
||||||
|
|
||||||
|
appendStringInfoString(buf, whereClauseString);
|
||||||
|
|
||||||
|
relation_close(relation, AccessShareLock);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AppendPublicationObjects appends a string representing a list of publication
|
||||||
|
* objects to a buffer.
|
||||||
|
*
|
||||||
|
* For instance: TABLE users, departments
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
AppendTables(StringInfo buf, List *tables, bool includeLocalTables)
|
||||||
|
{
|
||||||
|
RangeVar *rangeVar = NULL;
|
||||||
|
bool appendedObject = false;
|
||||||
|
|
||||||
|
foreach_ptr(rangeVar, tables)
|
||||||
|
{
|
||||||
|
if (!includeLocalTables &&
|
||||||
|
!IsCitusTableRangeVar(rangeVar, NoLock, false))
|
||||||
|
{
|
||||||
|
/* do not propagate local tables */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *schemaName = rangeVar->schemaname;
|
||||||
|
char *tableName = rangeVar->relname;
|
||||||
|
|
||||||
|
if (schemaName != NULL)
|
||||||
|
{
|
||||||
|
/* qualified table name */
|
||||||
|
appendStringInfo(buf, "%s %s",
|
||||||
|
appendedObject ? "," : " TABLE",
|
||||||
|
quote_qualified_identifier(schemaName, tableName));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* unqualified table name */
|
||||||
|
appendStringInfo(buf, "%s %s",
|
||||||
|
appendedObject ? "," : " TABLE",
|
||||||
|
quote_identifier(tableName));
|
||||||
|
}
|
||||||
|
|
||||||
|
appendedObject = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return appendedObject;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DeparseAlterPublicationSchemaStmt builds and returns a string representing
|
||||||
|
* an AlterPublicationStmt.
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
DeparseAlterPublicationStmt(Node *node)
|
||||||
|
{
|
||||||
|
/* regular deparsing function takes ALTER PUBLICATION from the parser */
|
||||||
|
bool whereClauseNeedsTransform = true;
|
||||||
|
|
||||||
|
/* for regular ALTER PUBLICATION we do not propagate local tables */
|
||||||
|
bool includeLocalTables = false;
|
||||||
|
|
||||||
|
return DeparseAlterPublicationStmtExtended(node, whereClauseNeedsTransform,
|
||||||
|
includeLocalTables);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DeparseAlterPublicationStmtExtended builds and returns a string representing a
|
||||||
|
* AlterPublicationStmt, which may have already-transformed expressions.
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
DeparseAlterPublicationStmtExtended(Node *node, bool whereClauseNeedsTransform,
|
||||||
|
bool includeLocalTables)
|
||||||
|
{
|
||||||
|
AlterPublicationStmt *stmt = castNode(AlterPublicationStmt, node);
|
||||||
|
StringInfoData str = { 0 };
|
||||||
|
initStringInfo(&str);
|
||||||
|
|
||||||
|
if (!AppendAlterPublicationStmt(&str, stmt, whereClauseNeedsTransform,
|
||||||
|
includeLocalTables))
|
||||||
|
{
|
||||||
|
Assert(!includeLocalTables);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When there are no objects to propagate, then there is no
|
||||||
|
* valid ALTER PUBLICATION to construct.
|
||||||
|
*/
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return str.data;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AppendAlterPublicationStmt appends a string representing an AlterPublicationStmt
|
||||||
|
* of the form ALTER PUBLICATION .. ADD/SET/DROP
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
AppendAlterPublicationStmt(StringInfo buf, AlterPublicationStmt *stmt,
|
||||||
|
bool whereClauseNeedsTransform,
|
||||||
|
bool includeLocalTables)
|
||||||
|
{
|
||||||
|
appendStringInfo(buf, "ALTER PUBLICATION %s",
|
||||||
|
quote_identifier(stmt->pubname));
|
||||||
|
|
||||||
|
if (stmt->options)
|
||||||
|
{
|
||||||
|
appendStringInfoString(buf, " SET (");
|
||||||
|
AppendPublicationOptions(buf, stmt->options);
|
||||||
|
appendStringInfoString(buf, ")");
|
||||||
|
|
||||||
|
/* changing options cannot be combined with other actions */
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||||
|
AppendAlterPublicationAction(buf, stmt->action);
|
||||||
|
return AppendPublicationObjects(buf, stmt->pubobjects, whereClauseNeedsTransform,
|
||||||
|
includeLocalTables);
|
||||||
|
#else
|
||||||
|
AppendDefElemAction(buf, stmt->tableAction);
|
||||||
|
return AppendTables(buf, stmt->tables, includeLocalTables);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AppendAlterPublicationAction appends a string representing an AlterPublicationAction
|
||||||
|
* to a buffer.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AppendAlterPublicationAction(StringInfo buf, AlterPublicationAction action)
|
||||||
|
{
|
||||||
|
switch (action)
|
||||||
|
{
|
||||||
|
case AP_AddObjects:
|
||||||
|
{
|
||||||
|
appendStringInfoString(buf, " ADD");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case AP_DropObjects:
|
||||||
|
{
|
||||||
|
appendStringInfoString(buf, " DROP");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case AP_SetObjects:
|
||||||
|
{
|
||||||
|
appendStringInfoString(buf, " SET");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("unrecognized publication action: %d", action)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AppendDefElemAction appends a string representing a DefElemAction
|
||||||
|
* to a buffer.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AppendDefElemAction(StringInfo buf, DefElemAction action)
|
||||||
|
{
|
||||||
|
switch (action)
|
||||||
|
{
|
||||||
|
case DEFELEM_ADD:
|
||||||
|
{
|
||||||
|
appendStringInfoString(buf, " ADD");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case DEFELEM_DROP:
|
||||||
|
{
|
||||||
|
appendStringInfoString(buf, " DROP");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case DEFELEM_SET:
|
||||||
|
{
|
||||||
|
appendStringInfoString(buf, " SET");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("unrecognized publication action: %d", action)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DeparseDropPublicationStmt builds and returns a string representing the DropStmt
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
DeparseDropPublicationStmt(Node *node)
|
||||||
|
{
|
||||||
|
DropStmt *stmt = castNode(DropStmt, node);
|
||||||
|
StringInfoData str = { 0 };
|
||||||
|
initStringInfo(&str);
|
||||||
|
|
||||||
|
Assert(stmt->removeType == OBJECT_PUBLICATION);
|
||||||
|
|
||||||
|
AppendDropPublicationStmt(&str, stmt);
|
||||||
|
|
||||||
|
return str.data;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AppendDropPublicationStmt appends a string representing the DropStmt to a buffer
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AppendDropPublicationStmt(StringInfo buf, DropStmt *stmt)
|
||||||
|
{
|
||||||
|
appendStringInfoString(buf, "DROP PUBLICATION ");
|
||||||
|
if (stmt->missing_ok)
|
||||||
|
{
|
||||||
|
appendStringInfoString(buf, "IF EXISTS ");
|
||||||
|
}
|
||||||
|
AppendIdentifierList(buf, stmt->objects);
|
||||||
|
if (stmt->behavior == DROP_CASCADE)
|
||||||
|
{
|
||||||
|
appendStringInfoString(buf, " CASCADE");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DeparseRenamePublicationStmt builds and returns a string representing the RenameStmt
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
DeparseRenamePublicationStmt(Node *node)
|
||||||
|
{
|
||||||
|
RenameStmt *stmt = castNode(RenameStmt, node);
|
||||||
|
StringInfoData str = { 0 };
|
||||||
|
initStringInfo(&str);
|
||||||
|
|
||||||
|
Assert(stmt->renameType == OBJECT_PUBLICATION);
|
||||||
|
|
||||||
|
AppendRenamePublicationStmt(&str, stmt);
|
||||||
|
|
||||||
|
return str.data;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AppendRenamePublicationStmt appends a string representing the RenameStmt to a buffer
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AppendRenamePublicationStmt(StringInfo buf, RenameStmt *stmt)
|
||||||
|
{
|
||||||
|
appendStringInfo(buf, "ALTER PUBLICATION %s RENAME TO %s;",
|
||||||
|
quote_identifier(strVal(stmt->object)),
|
||||||
|
quote_identifier(stmt->newname));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DeparseAlterPublicationOwnerStmt builds and returns a string representing the AlterOwnerStmt
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
DeparseAlterPublicationOwnerStmt(Node *node)
|
||||||
|
{
|
||||||
|
AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node);
|
||||||
|
StringInfoData str = { 0 };
|
||||||
|
initStringInfo(&str);
|
||||||
|
|
||||||
|
Assert(stmt->objectType == OBJECT_PUBLICATION);
|
||||||
|
|
||||||
|
AppendAlterPublicationOwnerStmt(&str, stmt);
|
||||||
|
|
||||||
|
return str.data;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AppendAlterPublicationOwnerStmt appends a string representing the AlterOwnerStmt to a buffer
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AppendAlterPublicationOwnerStmt(StringInfo buf, AlterOwnerStmt *stmt)
|
||||||
|
{
|
||||||
|
Assert(stmt->objectType == OBJECT_PUBLICATION);
|
||||||
|
|
||||||
|
appendStringInfo(buf, "ALTER PUBLICATION %s OWNER TO %s;",
|
||||||
|
quote_identifier(strVal(stmt->object)),
|
||||||
|
RoleSpecString(stmt->newowner, true));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AppendPublicationOptions appends a string representing a list of publication opions.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AppendPublicationOptions(StringInfo stringBuffer, List *optionList)
|
||||||
|
{
|
||||||
|
ListCell *optionCell = NULL;
|
||||||
|
bool firstOptionPrinted = false;
|
||||||
|
|
||||||
|
foreach(optionCell, optionList)
|
||||||
|
{
|
||||||
|
DefElem *option = (DefElem *) lfirst(optionCell);
|
||||||
|
char *optionName = option->defname;
|
||||||
|
char *optionValue = defGetString(option);
|
||||||
|
NodeTag valueType = nodeTag(option->arg);
|
||||||
|
|
||||||
|
if (firstOptionPrinted)
|
||||||
|
{
|
||||||
|
appendStringInfo(stringBuffer, ", ");
|
||||||
|
}
|
||||||
|
firstOptionPrinted = true;
|
||||||
|
|
||||||
|
appendStringInfo(stringBuffer, "%s = ",
|
||||||
|
quote_identifier(optionName));
|
||||||
|
|
||||||
|
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||||
|
if (valueType == T_Integer || valueType == T_Float || valueType == T_Boolean)
|
||||||
|
#else
|
||||||
|
if (valueType == T_Integer || valueType == T_Float)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
/* string escaping is unnecessary for numeric types and can cause issues */
|
||||||
|
appendStringInfo(stringBuffer, "%s", optionValue);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
appendStringInfo(stringBuffer, "%s", quote_literal_cstr(optionValue));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AppendIdentifierList appends a string representing a list of
|
||||||
|
* identifiers (of String type).
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AppendIdentifierList(StringInfo buf, List *objects)
|
||||||
|
{
|
||||||
|
ListCell *objectCell = NULL;
|
||||||
|
|
||||||
|
foreach(objectCell, objects)
|
||||||
|
{
|
||||||
|
char *name = strVal(lfirst(objectCell));
|
||||||
|
|
||||||
|
if (objectCell != list_head(objects))
|
||||||
|
{
|
||||||
|
appendStringInfo(buf, ", ");
|
||||||
|
}
|
||||||
|
|
||||||
|
appendStringInfoString(buf, quote_identifier(name));
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,119 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* qualify_publication_stmt.c
|
||||||
|
* Functions specialized in fully qualifying all publication statements. These
|
||||||
|
* functions are dispatched from qualify.c
|
||||||
|
*
|
||||||
|
* Copyright (c), Citus Data, Inc.
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "catalog/namespace.h"
|
||||||
|
#include "distributed/deparser.h"
|
||||||
|
#include "distributed/listutils.h"
|
||||||
|
#include "nodes/nodes.h"
|
||||||
|
#include "utils/guc.h"
|
||||||
|
#include "utils/lsyscache.h"
|
||||||
|
|
||||||
|
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||||
|
static void QualifyPublicationObjects(List *publicationObjects);
|
||||||
|
#else
|
||||||
|
static void QualifyTables(List *tables);
|
||||||
|
#endif
|
||||||
|
static void QualifyPublicationRangeVar(RangeVar *publication);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* QualifyCreatePublicationStmt quailifies the publication names of the
|
||||||
|
* CREATE PUBLICATION statement.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
QualifyCreatePublicationStmt(Node *node)
|
||||||
|
{
|
||||||
|
CreatePublicationStmt *stmt = castNode(CreatePublicationStmt, node);
|
||||||
|
|
||||||
|
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||||
|
QualifyPublicationObjects(stmt->pubobjects);
|
||||||
|
#else
|
||||||
|
QualifyTables(stmt->tables);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* QualifyPublicationObjects ensures all table names in a list of
|
||||||
|
* publication objects are fully qualified.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
QualifyPublicationObjects(List *publicationObjects)
|
||||||
|
{
|
||||||
|
PublicationObjSpec *publicationObject = NULL;
|
||||||
|
|
||||||
|
foreach_ptr(publicationObject, publicationObjects)
|
||||||
|
{
|
||||||
|
if (publicationObject->pubobjtype == PUBLICATIONOBJ_TABLE)
|
||||||
|
{
|
||||||
|
/* FOR TABLE ... */
|
||||||
|
PublicationTable *publicationTable = publicationObject->pubtable;
|
||||||
|
|
||||||
|
QualifyPublicationRangeVar(publicationTable->relation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
/*
|
||||||
|
* QualifyTables ensures all table names in a list are fully qualified.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
QualifyTables(List *tables)
|
||||||
|
{
|
||||||
|
RangeVar *rangeVar = NULL;
|
||||||
|
|
||||||
|
foreach_ptr(rangeVar, tables)
|
||||||
|
{
|
||||||
|
QualifyPublicationRangeVar(rangeVar);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* QualifyPublicationObjects ensures all table names in a list of
|
||||||
|
* publication objects are fully qualified.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
QualifyAlterPublicationStmt(Node *node)
|
||||||
|
{
|
||||||
|
AlterPublicationStmt *stmt = castNode(AlterPublicationStmt, node);
|
||||||
|
|
||||||
|
#if (PG_VERSION_NUM >= PG_VERSION_15)
|
||||||
|
QualifyPublicationObjects(stmt->pubobjects);
|
||||||
|
#else
|
||||||
|
QualifyTables(stmt->tables);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* QualifyPublicationRangeVar qualifies the given publication RangeVar if it is not qualified.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
QualifyPublicationRangeVar(RangeVar *publication)
|
||||||
|
{
|
||||||
|
if (publication->schemaname == NULL)
|
||||||
|
{
|
||||||
|
Oid publicationOid = RelnameGetRelid(publication->relname);
|
||||||
|
Oid schemaOid = get_rel_namespace(publicationOid);
|
||||||
|
publication->schemaname = get_namespace_name(schemaOid);
|
||||||
|
}
|
||||||
|
}
|
|
@ -53,6 +53,7 @@
|
||||||
#include "common/keywords.h"
|
#include "common/keywords.h"
|
||||||
#include "distributed/citus_nodefuncs.h"
|
#include "distributed/citus_nodefuncs.h"
|
||||||
#include "distributed/citus_ruleutils.h"
|
#include "distributed/citus_ruleutils.h"
|
||||||
|
#include "distributed/multi_router_planner.h"
|
||||||
#include "executor/spi.h"
|
#include "executor/spi.h"
|
||||||
#include "foreign/foreign.h"
|
#include "foreign/foreign.h"
|
||||||
#include "funcapi.h"
|
#include "funcapi.h"
|
||||||
|
@ -3723,7 +3724,6 @@ static void
|
||||||
get_merge_query_def(Query *query, deparse_context *context)
|
get_merge_query_def(Query *query, deparse_context *context)
|
||||||
{
|
{
|
||||||
StringInfo buf = context->buf;
|
StringInfo buf = context->buf;
|
||||||
RangeTblEntry *targetRte;
|
|
||||||
|
|
||||||
/* Insert the WITH clause if given */
|
/* Insert the WITH clause if given */
|
||||||
get_with_clause(query, context);
|
get_with_clause(query, context);
|
||||||
|
@ -3731,7 +3731,7 @@ get_merge_query_def(Query *query, deparse_context *context)
|
||||||
/*
|
/*
|
||||||
* Start the query with MERGE INTO <target>
|
* Start the query with MERGE INTO <target>
|
||||||
*/
|
*/
|
||||||
targetRte = rt_fetch(query->resultRelation, query->rtable);
|
RangeTblEntry *targetRte = ExtractResultRelationRTE(query);
|
||||||
|
|
||||||
if (PRETTY_INDENT(context))
|
if (PRETTY_INDENT(context))
|
||||||
{
|
{
|
||||||
|
@ -3853,6 +3853,15 @@ get_merge_query_def(Query *query, deparse_context *context)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RETURNING is not supported in MERGE, so it must be NULL, but if PG adds it later,
|
||||||
|
* we might miss it, let's raise an exception to investigate.
|
||||||
|
*/
|
||||||
|
if (unlikely(query->returningList))
|
||||||
|
{
|
||||||
|
elog(ERROR, "Unexpected RETURNING clause in MERGE");
|
||||||
|
}
|
||||||
|
|
||||||
ereport(DEBUG1, (errmsg("<Deparsed MERGE query: %s>", buf->data)));
|
ereport(DEBUG1, (errmsg("<Deparsed MERGE query: %s>", buf->data)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -9,6 +9,7 @@
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
#include "distributed/distributed_execution_locks.h"
|
#include "distributed/distributed_execution_locks.h"
|
||||||
|
#include "distributed/executor_util.h"
|
||||||
#include "distributed/listutils.h"
|
#include "distributed/listutils.h"
|
||||||
#include "distributed/coordinator_protocol.h"
|
#include "distributed/coordinator_protocol.h"
|
||||||
#include "distributed/metadata_cache.h"
|
#include "distributed/metadata_cache.h"
|
||||||
|
@ -19,6 +20,259 @@
|
||||||
#include "distributed/transaction_management.h"
|
#include "distributed/transaction_management.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AcquireExecutorShardLocksForExecution acquires advisory lock on shard IDs
|
||||||
|
* to prevent unsafe concurrent modifications of shards.
|
||||||
|
*
|
||||||
|
* We prevent concurrent modifications of shards in two cases:
|
||||||
|
* 1. Any non-commutative writes to a replicated table
|
||||||
|
* 2. Multi-shard writes that are executed in parallel
|
||||||
|
*
|
||||||
|
* The first case ensures we do not apply updates in different orders on
|
||||||
|
* different replicas (e.g. of a reference table), which could lead the
|
||||||
|
* replicas to diverge.
|
||||||
|
*
|
||||||
|
* The second case prevents deadlocks due to out-of-order execution.
|
||||||
|
*
|
||||||
|
* There are two GUCs that can override the default behaviors.
|
||||||
|
* 'citus.all_modifications_commutative' relaxes locking
|
||||||
|
* that's done for the purpose of keeping replicas consistent.
|
||||||
|
* 'citus.enable_deadlock_prevention' relaxes locking done for
|
||||||
|
* the purpose of avoiding deadlocks between concurrent
|
||||||
|
* multi-shard commands.
|
||||||
|
*
|
||||||
|
* We do not take executor shard locks for utility commands such as
|
||||||
|
* TRUNCATE because the table locks already prevent concurrent access.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
AcquireExecutorShardLocksForExecution(RowModifyLevel modLevel, List *taskList)
|
||||||
|
{
|
||||||
|
if (modLevel <= ROW_MODIFY_READONLY &&
|
||||||
|
!SelectForUpdateOnReferenceTable(taskList))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Executor locks only apply to DML commands and SELECT FOR UPDATE queries
|
||||||
|
* touching reference tables.
|
||||||
|
*/
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool requiresParallelExecutionLocks =
|
||||||
|
!(list_length(taskList) == 1 || ShouldRunTasksSequentially(taskList));
|
||||||
|
|
||||||
|
bool modifiedTableReplicated = ModifiedTableReplicated(taskList);
|
||||||
|
if (!modifiedTableReplicated && !requiresParallelExecutionLocks)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* When a distributed query on tables with replication
|
||||||
|
* factor == 1 and command hits only a single shard, we
|
||||||
|
* rely on Postgres to handle the serialization of the
|
||||||
|
* concurrent modifications on the workers.
|
||||||
|
*
|
||||||
|
* For reference tables, even if their placements are replicated
|
||||||
|
* ones (e.g., single node), we acquire the distributed execution
|
||||||
|
* locks to be consistent when new node(s) are added. So, they
|
||||||
|
* do not return at this point.
|
||||||
|
*/
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We first assume that all the remaining modifications are going to
|
||||||
|
* be serialized. So, start with an ExclusiveLock and lower the lock level
|
||||||
|
* as much as possible.
|
||||||
|
*/
|
||||||
|
int lockMode = ExclusiveLock;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In addition to honouring commutativity rules, we currently only
|
||||||
|
* allow a single multi-shard command on a shard at a time. Otherwise,
|
||||||
|
* concurrent multi-shard commands may take row-level locks on the
|
||||||
|
* shard placements in a different order and create a distributed
|
||||||
|
* deadlock. This applies even when writes are commutative and/or
|
||||||
|
* there is no replication. This can be relaxed via
|
||||||
|
* EnableDeadlockPrevention.
|
||||||
|
*
|
||||||
|
* 1. If citus.all_modifications_commutative is set to true, then all locks
|
||||||
|
* are acquired as RowExclusiveLock.
|
||||||
|
*
|
||||||
|
* 2. If citus.all_modifications_commutative is false, then only the shards
|
||||||
|
* with more than one replicas are locked with ExclusiveLock. Otherwise, the
|
||||||
|
* lock is acquired with ShareUpdateExclusiveLock.
|
||||||
|
*
|
||||||
|
* ShareUpdateExclusiveLock conflicts with itself such that only one
|
||||||
|
* multi-shard modification at a time is allowed on a shard. It also conflicts
|
||||||
|
* with ExclusiveLock, which ensures that updates/deletes/upserts are applied
|
||||||
|
* in the same order on all placements. It does not conflict with
|
||||||
|
* RowExclusiveLock, which is normally obtained by single-shard, commutative
|
||||||
|
* writes.
|
||||||
|
*/
|
||||||
|
if (!modifiedTableReplicated && requiresParallelExecutionLocks)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* When there is no replication then we only need to prevent
|
||||||
|
* concurrent multi-shard commands on the same shards. This is
|
||||||
|
* because concurrent, parallel commands may modify the same
|
||||||
|
* set of shards, but in different orders. The order of the
|
||||||
|
* accesses might trigger distributed deadlocks that are not
|
||||||
|
* possible to happen on non-distributed systems such
|
||||||
|
* regular Postgres.
|
||||||
|
*
|
||||||
|
* As an example, assume that we have two queries: query-1 and query-2.
|
||||||
|
* Both queries access shard-1 and shard-2. If query-1 first accesses to
|
||||||
|
* shard-1 then shard-2, and query-2 accesses shard-2 then shard-1, these
|
||||||
|
* two commands might block each other in case they modify the same rows
|
||||||
|
* (e.g., cause distributed deadlocks).
|
||||||
|
*
|
||||||
|
* In either case, ShareUpdateExclusive has the desired effect, since
|
||||||
|
* it conflicts with itself and ExclusiveLock (taken by non-commutative
|
||||||
|
* writes).
|
||||||
|
*
|
||||||
|
* However, some users find this too restrictive, so we allow them to
|
||||||
|
* reduce to a RowExclusiveLock when citus.enable_deadlock_prevention
|
||||||
|
* is enabled, which lets multi-shard modifications run in parallel as
|
||||||
|
* long as they all disable the GUC.
|
||||||
|
*/
|
||||||
|
lockMode =
|
||||||
|
EnableDeadlockPrevention ? ShareUpdateExclusiveLock : RowExclusiveLock;
|
||||||
|
|
||||||
|
if (!IsCoordinator())
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We also skip taking a heavy-weight lock when running a multi-shard
|
||||||
|
* commands from workers, since we currently do not prevent concurrency
|
||||||
|
* across workers anyway.
|
||||||
|
*/
|
||||||
|
lockMode = RowExclusiveLock;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (modifiedTableReplicated)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* When we are executing distributed queries on replicated tables, our
|
||||||
|
* default behaviour is to prevent any concurrency. This is valid
|
||||||
|
* for when parallel execution is happening or not.
|
||||||
|
*
|
||||||
|
* The reason is that we cannot control the order of the placement accesses
|
||||||
|
* of two distributed queries to the same shards. The order of the accesses
|
||||||
|
* might cause the replicas of the same shard placements diverge. This is
|
||||||
|
* not possible to happen on non-distributed systems such regular Postgres.
|
||||||
|
*
|
||||||
|
* As an example, assume that we have two queries: query-1 and query-2.
|
||||||
|
* Both queries only access the placements of shard-1, say p-1 and p-2.
|
||||||
|
*
|
||||||
|
* And, assume that these queries are non-commutative, such as:
|
||||||
|
* query-1: UPDATE table SET b = 1 WHERE key = 1;
|
||||||
|
* query-2: UPDATE table SET b = 2 WHERE key = 1;
|
||||||
|
*
|
||||||
|
* If query-1 accesses to p-1 then p-2, and query-2 accesses
|
||||||
|
* p-2 then p-1, these two commands would leave the p-1 and p-2
|
||||||
|
* diverged (e.g., the values for the column "b" would be different).
|
||||||
|
*
|
||||||
|
* The only exception to this rule is the single shard commutative
|
||||||
|
* modifications, such as INSERTs. In that case, we can allow
|
||||||
|
* concurrency among such backends, hence lowering the lock level
|
||||||
|
* to RowExclusiveLock.
|
||||||
|
*/
|
||||||
|
if (!requiresParallelExecutionLocks && modLevel < ROW_MODIFY_NONCOMMUTATIVE)
|
||||||
|
{
|
||||||
|
lockMode = RowExclusiveLock;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (AllModificationsCommutative)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* The mapping is overridden when all_modifications_commutative is set to true.
|
||||||
|
* In that case, all modifications are treated as commutative, which can be used
|
||||||
|
* to communicate that the application is only generating commutative
|
||||||
|
* UPDATE/DELETE/UPSERT commands and exclusive locks are unnecessary. This
|
||||||
|
* is irrespective of single-shard/multi-shard or replicated tables.
|
||||||
|
*/
|
||||||
|
lockMode = RowExclusiveLock;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* now, iterate on the tasks and acquire the executor locks on the shards */
|
||||||
|
List *anchorShardIntervalList = NIL;
|
||||||
|
List *relationRowLockList = NIL;
|
||||||
|
List *requiresConsistentSnapshotRelationShardList = NIL;
|
||||||
|
|
||||||
|
Task *task = NULL;
|
||||||
|
foreach_ptr(task, taskList)
|
||||||
|
{
|
||||||
|
ShardInterval *anchorShardInterval = LoadShardInterval(task->anchorShardId);
|
||||||
|
anchorShardIntervalList = lappend(anchorShardIntervalList, anchorShardInterval);
|
||||||
|
|
||||||
|
/* Acquire additional locks for SELECT .. FOR UPDATE on reference tables */
|
||||||
|
AcquireExecutorShardLocksForRelationRowLockList(task->relationRowLockList);
|
||||||
|
|
||||||
|
relationRowLockList =
|
||||||
|
list_concat(relationRowLockList,
|
||||||
|
task->relationRowLockList);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the task has a subselect, then we may need to lock the shards from which
|
||||||
|
* the query selects as well to prevent the subselects from seeing different
|
||||||
|
* results on different replicas.
|
||||||
|
*/
|
||||||
|
if (RequiresConsistentSnapshot(task))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* ExclusiveLock conflicts with all lock types used by modifications
|
||||||
|
* and therefore prevents other modifications from running
|
||||||
|
* concurrently.
|
||||||
|
*/
|
||||||
|
requiresConsistentSnapshotRelationShardList =
|
||||||
|
list_concat(requiresConsistentSnapshotRelationShardList,
|
||||||
|
task->relationShardList);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Acquire the locks in a sorted way to avoid deadlocks due to lock
|
||||||
|
* ordering across concurrent sessions.
|
||||||
|
*/
|
||||||
|
anchorShardIntervalList =
|
||||||
|
SortList(anchorShardIntervalList, CompareShardIntervalsById);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we are dealing with a partition we are also taking locks on parent table
|
||||||
|
* to prevent deadlocks on concurrent operations on a partition and its parent.
|
||||||
|
*
|
||||||
|
* Note that this function currently does not acquire any remote locks as that
|
||||||
|
* is necessary to control the concurrency across multiple nodes for replicated
|
||||||
|
* tables. That is because Citus currently does not allow modifications to
|
||||||
|
* partitions from any node other than the coordinator.
|
||||||
|
*/
|
||||||
|
LockParentShardResourceIfPartition(anchorShardIntervalList, lockMode);
|
||||||
|
|
||||||
|
/* Acquire distribution execution locks on the affected shards */
|
||||||
|
SerializeNonCommutativeWrites(anchorShardIntervalList, lockMode);
|
||||||
|
|
||||||
|
if (relationRowLockList != NIL)
|
||||||
|
{
|
||||||
|
/* Acquire additional locks for SELECT .. FOR UPDATE on reference tables */
|
||||||
|
AcquireExecutorShardLocksForRelationRowLockList(relationRowLockList);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (requiresConsistentSnapshotRelationShardList != NIL)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If the task has a subselect, then we may need to lock the shards from which
|
||||||
|
* the query selects as well to prevent the subselects from seeing different
|
||||||
|
* results on different replicas.
|
||||||
|
*
|
||||||
|
* ExclusiveLock conflicts with all lock types used by modifications
|
||||||
|
* and therefore prevents other modifications from running
|
||||||
|
* concurrently.
|
||||||
|
*/
|
||||||
|
LockRelationShardResources(requiresConsistentSnapshotRelationShardList,
|
||||||
|
ExclusiveLock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RequiresConsistentSnapshot returns true if the given task need to take
|
* RequiresConsistentSnapshot returns true if the given task need to take
|
||||||
* the necessary locks to ensure that a subquery in the modify query
|
* the necessary locks to ensure that a subquery in the modify query
|
||||||
|
@ -188,3 +442,27 @@ LockPartitionRelations(Oid relationId, LOCKMODE lockMode)
|
||||||
LockRelationOid(partitionRelationId, lockMode);
|
LockRelationOid(partitionRelationId, lockMode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* LockPartitionsForDistributedPlan ensures commands take locks on all partitions
|
||||||
|
* of a distributed table that appears in the query. We do this primarily out of
|
||||||
|
* consistency with PostgreSQL locking.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
LockPartitionsForDistributedPlan(DistributedPlan *plan)
|
||||||
|
{
|
||||||
|
if (TaskListModifiesDatabase(plan->modLevel, plan->workerJob->taskList))
|
||||||
|
{
|
||||||
|
Oid targetRelationId = plan->targetRelationId;
|
||||||
|
|
||||||
|
LockPartitionsInRelationList(list_make1_oid(targetRelationId), RowExclusiveLock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Lock partitions of tables that appear in a SELECT or subquery. In the
|
||||||
|
* DML case this also includes the target relation, but since we already
|
||||||
|
* have a stronger lock this doesn't do any harm.
|
||||||
|
*/
|
||||||
|
LockPartitionsInRelationList(plan->relationIdList, AccessShareLock);
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* executor_util_tasks.c
|
||||||
|
*
|
||||||
|
* Utility functions for dealing with task lists in the executor.
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "postgres.h"
|
||||||
|
#include "funcapi.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
|
||||||
|
#include "distributed/executor_util.h"
|
||||||
|
#include "utils/lsyscache.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ExtractParametersForRemoteExecution extracts parameter types and values from
|
||||||
|
* the given ParamListInfo structure, and fills parameter type and value arrays.
|
||||||
|
* It changes oid of custom types to InvalidOid so that they are the same in workers
|
||||||
|
* and coordinators.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ExtractParametersForRemoteExecution(ParamListInfo paramListInfo, Oid **parameterTypes,
|
||||||
|
const char ***parameterValues)
|
||||||
|
{
|
||||||
|
ExtractParametersFromParamList(paramListInfo, parameterTypes,
|
||||||
|
parameterValues, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ExtractParametersFromParamList extracts parameter types and values from
|
||||||
|
* the given ParamListInfo structure, and fills parameter type and value arrays.
|
||||||
|
* If useOriginalCustomTypeOids is true, it uses the original oids for custom types.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ExtractParametersFromParamList(ParamListInfo paramListInfo,
|
||||||
|
Oid **parameterTypes,
|
||||||
|
const char ***parameterValues, bool
|
||||||
|
useOriginalCustomTypeOids)
|
||||||
|
{
|
||||||
|
int parameterCount = paramListInfo->numParams;
|
||||||
|
|
||||||
|
*parameterTypes = (Oid *) palloc0(parameterCount * sizeof(Oid));
|
||||||
|
*parameterValues = (const char **) palloc0(parameterCount * sizeof(char *));
|
||||||
|
|
||||||
|
/* get parameter types and values */
|
||||||
|
for (int parameterIndex = 0; parameterIndex < parameterCount; parameterIndex++)
|
||||||
|
{
|
||||||
|
ParamExternData *parameterData = ¶mListInfo->params[parameterIndex];
|
||||||
|
Oid typeOutputFunctionId = InvalidOid;
|
||||||
|
bool variableLengthType = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use 0 for data types where the oid values can be different on
|
||||||
|
* the coordinator and worker nodes. Therefore, the worker nodes can
|
||||||
|
* infer the correct oid.
|
||||||
|
*/
|
||||||
|
if (parameterData->ptype >= FirstNormalObjectId && !useOriginalCustomTypeOids)
|
||||||
|
{
|
||||||
|
(*parameterTypes)[parameterIndex] = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
(*parameterTypes)[parameterIndex] = parameterData->ptype;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the parameter is not referenced / used (ptype == 0) and
|
||||||
|
* would otherwise have errored out inside standard_planner()),
|
||||||
|
* don't pass a value to the remote side, and pass text oid to prevent
|
||||||
|
* undetermined data type errors on workers.
|
||||||
|
*/
|
||||||
|
if (parameterData->ptype == 0)
|
||||||
|
{
|
||||||
|
(*parameterValues)[parameterIndex] = NULL;
|
||||||
|
(*parameterTypes)[parameterIndex] = TEXTOID;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the parameter is NULL then we preserve its type, but
|
||||||
|
* don't need to evaluate its value.
|
||||||
|
*/
|
||||||
|
if (parameterData->isnull)
|
||||||
|
{
|
||||||
|
(*parameterValues)[parameterIndex] = NULL;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
getTypeOutputInfo(parameterData->ptype, &typeOutputFunctionId,
|
||||||
|
&variableLengthType);
|
||||||
|
|
||||||
|
(*parameterValues)[parameterIndex] = OidOutputFunctionCall(typeOutputFunctionId,
|
||||||
|
parameterData->value);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,297 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* executor_util_tasks.c
|
||||||
|
*
|
||||||
|
* Utility functions for dealing with task lists in the executor.
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "postgres.h"
|
||||||
|
#include "funcapi.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
|
||||||
|
#include "distributed/executor_util.h"
|
||||||
|
#include "distributed/listutils.h"
|
||||||
|
#include "distributed/shardinterval_utils.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TaskListModifiesDatabase is a helper function for DistributedExecutionModifiesDatabase and
|
||||||
|
* DistributedPlanModifiesDatabase.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
TaskListModifiesDatabase(RowModifyLevel modLevel, List *taskList)
|
||||||
|
{
|
||||||
|
if (modLevel > ROW_MODIFY_READONLY)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we cannot decide by only checking the row modify level,
|
||||||
|
* we should look closer to the tasks.
|
||||||
|
*/
|
||||||
|
if (list_length(taskList) < 1)
|
||||||
|
{
|
||||||
|
/* is this ever possible? */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Task *firstTask = (Task *) linitial(taskList);
|
||||||
|
|
||||||
|
return !ReadOnlyTask(firstTask->taskType);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TaskListRequiresRollback returns true if the distributed
|
||||||
|
* execution should start a CoordinatedTransaction. In other words, if the
|
||||||
|
* function returns true, the execution sends BEGIN; to every connection
|
||||||
|
* involved in the distributed execution.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
TaskListRequiresRollback(List *taskList)
|
||||||
|
{
|
||||||
|
int taskCount = list_length(taskList);
|
||||||
|
|
||||||
|
if (taskCount == 0)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Task *task = (Task *) linitial(taskList);
|
||||||
|
if (task->cannotBeExecutedInTransction)
|
||||||
|
{
|
||||||
|
/* vacuum, create index concurrently etc. */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool selectForUpdate = task->relationRowLockList != NIL;
|
||||||
|
if (selectForUpdate)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Do not check SelectOpensTransactionBlock, always open transaction block
|
||||||
|
* if SELECT FOR UPDATE is executed inside a distributed transaction.
|
||||||
|
*/
|
||||||
|
return IsMultiStatementTransaction();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ReadOnlyTask(task->taskType))
|
||||||
|
{
|
||||||
|
return SelectOpensTransactionBlock &&
|
||||||
|
IsTransactionBlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IsMultiStatementTransaction())
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (list_length(taskList) > 1)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (list_length(task->taskPlacementList) > 1)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Single DML/DDL tasks with replicated tables (including
|
||||||
|
* reference and non-reference tables) should require
|
||||||
|
* BEGIN/COMMIT/ROLLBACK.
|
||||||
|
*/
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (task->queryCount > 1)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* When there are multiple sequential queries in a task
|
||||||
|
* we need to run those as a transaction.
|
||||||
|
*/
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TaskListRequires2PC determines whether the given task list requires 2PC.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
TaskListRequires2PC(List *taskList)
|
||||||
|
{
|
||||||
|
if (taskList == NIL)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Task *task = (Task *) linitial(taskList);
|
||||||
|
if (ReadOnlyTask(task->taskType))
|
||||||
|
{
|
||||||
|
/* we do not trigger 2PC for ReadOnly queries */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool singleTask = list_length(taskList) == 1;
|
||||||
|
if (singleTask && list_length(task->taskPlacementList) == 1)
|
||||||
|
{
|
||||||
|
/* we do not trigger 2PC for modifications that are:
|
||||||
|
* - single task
|
||||||
|
* - single placement
|
||||||
|
*/
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Otherwise, all modifications are done via 2PC. This includes:
|
||||||
|
* - Multi-shard commands irrespective of the replication factor
|
||||||
|
* - Single-shard commands that are targeting more than one replica
|
||||||
|
*/
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TaskListCannotBeExecutedInTransaction returns true if any of the
|
||||||
|
* tasks in the input cannot be executed in a transaction. These are
|
||||||
|
* tasks like VACUUM or CREATE INDEX CONCURRENTLY etc.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
TaskListCannotBeExecutedInTransaction(List *taskList)
|
||||||
|
{
|
||||||
|
Task *task = NULL;
|
||||||
|
foreach_ptr(task, taskList)
|
||||||
|
{
|
||||||
|
if (task->cannotBeExecutedInTransction)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SelectForUpdateOnReferenceTable returns true if the input task
|
||||||
|
* contains a FOR UPDATE clause that locks any reference tables.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
SelectForUpdateOnReferenceTable(List *taskList)
|
||||||
|
{
|
||||||
|
if (list_length(taskList) != 1)
|
||||||
|
{
|
||||||
|
/* we currently do not support SELECT FOR UPDATE on multi task queries */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Task *task = (Task *) linitial(taskList);
|
||||||
|
RelationRowLock *relationRowLock = NULL;
|
||||||
|
foreach_ptr(relationRowLock, task->relationRowLockList)
|
||||||
|
{
|
||||||
|
Oid relationId = relationRowLock->relationId;
|
||||||
|
|
||||||
|
if (IsCitusTableType(relationId, REFERENCE_TABLE))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ReadOnlyTask returns true if the input task does a read-only operation
|
||||||
|
* on the database.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
ReadOnlyTask(TaskType taskType)
|
||||||
|
{
|
||||||
|
switch (taskType)
|
||||||
|
{
|
||||||
|
case READ_TASK:
|
||||||
|
case MAP_OUTPUT_FETCH_TASK:
|
||||||
|
case MAP_TASK:
|
||||||
|
case MERGE_TASK:
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ModifiedTableReplicated iterates on the task list and returns true
|
||||||
|
* if any of the tasks' anchor shard is a replicated table. We qualify
|
||||||
|
* replicated tables as any reference table or any distributed table with
|
||||||
|
* replication factor > 1.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
ModifiedTableReplicated(List *taskList)
|
||||||
|
{
|
||||||
|
Task *task = NULL;
|
||||||
|
foreach_ptr(task, taskList)
|
||||||
|
{
|
||||||
|
int64 shardId = task->anchorShardId;
|
||||||
|
|
||||||
|
if (shardId == INVALID_SHARD_ID)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ReferenceTableShardId(shardId))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Oid relationId = RelationIdForShard(shardId);
|
||||||
|
if (!SingleReplicatedTable(relationId))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ShouldRunTasksSequentially returns true if each of the individual tasks
|
||||||
|
* should be executed one by one. Note that this is different than
|
||||||
|
* MultiShardConnectionType == SEQUENTIAL_CONNECTION case. In that case,
|
||||||
|
* running the tasks across the nodes in parallel is acceptable and implemented
|
||||||
|
* in that way.
|
||||||
|
*
|
||||||
|
* However, the executions that are qualified here would perform poorly if the
|
||||||
|
* tasks across the workers are executed in parallel. We currently qualify only
|
||||||
|
* one class of distributed queries here, multi-row INSERTs. If we do not enforce
|
||||||
|
* true sequential execution, concurrent multi-row upserts could easily form
|
||||||
|
* a distributed deadlock when the upserts touch the same rows.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
ShouldRunTasksSequentially(List *taskList)
|
||||||
|
{
|
||||||
|
if (list_length(taskList) < 2)
|
||||||
|
{
|
||||||
|
/* single task plans are already qualified as sequential by definition */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* all the tasks are the same, so we only look one */
|
||||||
|
Task *initialTask = (Task *) linitial(taskList);
|
||||||
|
if (initialTask->rowValuesLists != NIL)
|
||||||
|
{
|
||||||
|
/* found a multi-row INSERT */
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
|
@ -0,0 +1,129 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* executor_util_tuples.c
|
||||||
|
*
|
||||||
|
* Utility functions for handling tuples during remote execution.
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "postgres.h"
|
||||||
|
#include "funcapi.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
|
||||||
|
#include "distributed/executor_util.h"
|
||||||
|
#include "utils/lsyscache.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TupleDescGetAttBinaryInMetadata - Build an AttInMetadata structure based on
|
||||||
|
* the supplied TupleDesc. AttInMetadata can be used in conjunction with
|
||||||
|
* fmStringInfos containing binary encoded types to produce a properly formed
|
||||||
|
* tuple.
|
||||||
|
*
|
||||||
|
* NOTE: This function is a copy of the PG function TupleDescGetAttInMetadata,
|
||||||
|
* except that it uses getTypeBinaryInputInfo instead of getTypeInputInfo.
|
||||||
|
*/
|
||||||
|
AttInMetadata *
|
||||||
|
TupleDescGetAttBinaryInMetadata(TupleDesc tupdesc)
|
||||||
|
{
|
||||||
|
int natts = tupdesc->natts;
|
||||||
|
int i;
|
||||||
|
Oid atttypeid;
|
||||||
|
Oid attinfuncid;
|
||||||
|
|
||||||
|
AttInMetadata *attinmeta = (AttInMetadata *) palloc(sizeof(AttInMetadata));
|
||||||
|
|
||||||
|
/* "Bless" the tupledesc so that we can make rowtype datums with it */
|
||||||
|
attinmeta->tupdesc = BlessTupleDesc(tupdesc);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Gather info needed later to call the "in" function for each attribute
|
||||||
|
*/
|
||||||
|
FmgrInfo *attinfuncinfo = (FmgrInfo *) palloc0(natts * sizeof(FmgrInfo));
|
||||||
|
Oid *attioparams = (Oid *) palloc0(natts * sizeof(Oid));
|
||||||
|
int32 *atttypmods = (int32 *) palloc0(natts * sizeof(int32));
|
||||||
|
|
||||||
|
for (i = 0; i < natts; i++)
|
||||||
|
{
|
||||||
|
Form_pg_attribute att = TupleDescAttr(tupdesc, i);
|
||||||
|
|
||||||
|
/* Ignore dropped attributes */
|
||||||
|
if (!att->attisdropped)
|
||||||
|
{
|
||||||
|
atttypeid = att->atttypid;
|
||||||
|
getTypeBinaryInputInfo(atttypeid, &attinfuncid, &attioparams[i]);
|
||||||
|
fmgr_info(attinfuncid, &attinfuncinfo[i]);
|
||||||
|
atttypmods[i] = att->atttypmod;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
attinmeta->attinfuncs = attinfuncinfo;
|
||||||
|
attinmeta->attioparams = attioparams;
|
||||||
|
attinmeta->atttypmods = atttypmods;
|
||||||
|
|
||||||
|
return attinmeta;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* BuildTupleFromBytes - build a HeapTuple given user data in binary form.
|
||||||
|
* values is an array of StringInfos, one for each attribute of the return
|
||||||
|
* tuple. A NULL StringInfo pointer indicates we want to create a NULL field.
|
||||||
|
*
|
||||||
|
* NOTE: This function is a copy of the PG function BuildTupleFromCStrings,
|
||||||
|
* except that it uses ReceiveFunctionCall instead of InputFunctionCall.
|
||||||
|
*/
|
||||||
|
HeapTuple
|
||||||
|
BuildTupleFromBytes(AttInMetadata *attinmeta, fmStringInfo *values)
|
||||||
|
{
|
||||||
|
TupleDesc tupdesc = attinmeta->tupdesc;
|
||||||
|
int natts = tupdesc->natts;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
Datum *dvalues = (Datum *) palloc(natts * sizeof(Datum));
|
||||||
|
bool *nulls = (bool *) palloc(natts * sizeof(bool));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Call the "in" function for each non-dropped attribute, even for nulls,
|
||||||
|
* to support domains.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < natts; i++)
|
||||||
|
{
|
||||||
|
if (!TupleDescAttr(tupdesc, i)->attisdropped)
|
||||||
|
{
|
||||||
|
/* Non-dropped attributes */
|
||||||
|
dvalues[i] = ReceiveFunctionCall(&attinmeta->attinfuncs[i],
|
||||||
|
values[i],
|
||||||
|
attinmeta->attioparams[i],
|
||||||
|
attinmeta->atttypmods[i]);
|
||||||
|
if (values[i] != NULL)
|
||||||
|
{
|
||||||
|
nulls[i] = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
nulls[i] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Handle dropped attributes by setting to NULL */
|
||||||
|
dvalues[i] = (Datum) 0;
|
||||||
|
nulls[i] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Form a tuple
|
||||||
|
*/
|
||||||
|
HeapTuple tuple = heap_form_tuple(tupdesc, dvalues, nulls);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Release locally palloc'd space. XXX would probably be good to pfree
|
||||||
|
* values of pass-by-reference datums, as well.
|
||||||
|
*/
|
||||||
|
pfree(dvalues);
|
||||||
|
pfree(nulls);
|
||||||
|
|
||||||
|
return tuple;
|
||||||
|
}
|
|
@ -409,11 +409,13 @@ ExecutePlanIntoColocatedIntermediateResults(Oid targetRelationId,
|
||||||
columnNameList);
|
columnNameList);
|
||||||
|
|
||||||
/* set up a DestReceiver that copies into the intermediate table */
|
/* set up a DestReceiver that copies into the intermediate table */
|
||||||
|
const bool publishableData = true;
|
||||||
CitusCopyDestReceiver *copyDest = CreateCitusCopyDestReceiver(targetRelationId,
|
CitusCopyDestReceiver *copyDest = CreateCitusCopyDestReceiver(targetRelationId,
|
||||||
columnNameList,
|
columnNameList,
|
||||||
partitionColumnIndex,
|
partitionColumnIndex,
|
||||||
executorState,
|
executorState,
|
||||||
intermediateResultIdPrefix);
|
intermediateResultIdPrefix,
|
||||||
|
publishableData);
|
||||||
|
|
||||||
ExecutePlanIntoDestReceiver(selectPlan, paramListInfo, (DestReceiver *) copyDest);
|
ExecutePlanIntoDestReceiver(selectPlan, paramListInfo, (DestReceiver *) copyDest);
|
||||||
|
|
||||||
|
@ -443,10 +445,12 @@ ExecutePlanIntoRelation(Oid targetRelationId, List *insertTargetList,
|
||||||
columnNameList);
|
columnNameList);
|
||||||
|
|
||||||
/* set up a DestReceiver that copies into the distributed table */
|
/* set up a DestReceiver that copies into the distributed table */
|
||||||
|
const bool publishableData = true;
|
||||||
CitusCopyDestReceiver *copyDest = CreateCitusCopyDestReceiver(targetRelationId,
|
CitusCopyDestReceiver *copyDest = CreateCitusCopyDestReceiver(targetRelationId,
|
||||||
columnNameList,
|
columnNameList,
|
||||||
partitionColumnIndex,
|
partitionColumnIndex,
|
||||||
executorState, NULL);
|
executorState, NULL,
|
||||||
|
publishableData);
|
||||||
|
|
||||||
ExecutePlanIntoDestReceiver(selectPlan, paramListInfo, (DestReceiver *) copyDest);
|
ExecutePlanIntoDestReceiver(selectPlan, paramListInfo, (DestReceiver *) copyDest);
|
||||||
|
|
||||||
|
|
|
@ -90,6 +90,7 @@
|
||||||
#include "distributed/local_executor.h"
|
#include "distributed/local_executor.h"
|
||||||
#include "distributed/local_plan_cache.h"
|
#include "distributed/local_plan_cache.h"
|
||||||
#include "distributed/coordinator_protocol.h"
|
#include "distributed/coordinator_protocol.h"
|
||||||
|
#include "distributed/executor_util.h"
|
||||||
#include "distributed/metadata_cache.h"
|
#include "distributed/metadata_cache.h"
|
||||||
#include "distributed/multi_executor.h"
|
#include "distributed/multi_executor.h"
|
||||||
#include "distributed/multi_server_executor.h"
|
#include "distributed/multi_server_executor.h"
|
||||||
|
|
|
@ -802,6 +802,11 @@ GetObjectTypeString(ObjectType objType)
|
||||||
return "function";
|
return "function";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case OBJECT_PUBLICATION:
|
||||||
|
{
|
||||||
|
return "publication";
|
||||||
|
}
|
||||||
|
|
||||||
case OBJECT_SCHEMA:
|
case OBJECT_SCHEMA:
|
||||||
{
|
{
|
||||||
return "schema";
|
return "schema";
|
||||||
|
|
|
@ -132,6 +132,7 @@ typedef struct ViewDependencyNode
|
||||||
static List * GetRelationSequenceDependencyList(Oid relationId);
|
static List * GetRelationSequenceDependencyList(Oid relationId);
|
||||||
static List * GetRelationFunctionDependencyList(Oid relationId);
|
static List * GetRelationFunctionDependencyList(Oid relationId);
|
||||||
static List * GetRelationTriggerFunctionDependencyList(Oid relationId);
|
static List * GetRelationTriggerFunctionDependencyList(Oid relationId);
|
||||||
|
static List * GetPublicationRelationsDependencyList(Oid relationId);
|
||||||
static List * GetRelationStatsSchemaDependencyList(Oid relationId);
|
static List * GetRelationStatsSchemaDependencyList(Oid relationId);
|
||||||
static List * GetRelationIndicesDependencyList(Oid relationId);
|
static List * GetRelationIndicesDependencyList(Oid relationId);
|
||||||
static DependencyDefinition * CreateObjectAddressDependencyDef(Oid classId, Oid objectId);
|
static DependencyDefinition * CreateObjectAddressDependencyDef(Oid classId, Oid objectId);
|
||||||
|
@ -722,6 +723,11 @@ SupportedDependencyByCitus(const ObjectAddress *address)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case OCLASS_PUBLICATION:
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
case OCLASS_TSCONFIG:
|
case OCLASS_TSCONFIG:
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
|
@ -1656,6 +1662,36 @@ ExpandCitusSupportedTypes(ObjectAddressCollector *collector, ObjectAddress targe
|
||||||
List *ruleRefDepList = GetViewRuleReferenceDependencyList(relationId);
|
List *ruleRefDepList = GetViewRuleReferenceDependencyList(relationId);
|
||||||
result = list_concat(result, ruleRefDepList);
|
result = list_concat(result, ruleRefDepList);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PublicationRelationId:
|
||||||
|
{
|
||||||
|
Oid publicationId = target.objectId;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Publications do not depend directly on relations, because dropping
|
||||||
|
* the relation will only remove it from the publications. However,
|
||||||
|
* we add a dependency to ensure the relation is created first when
|
||||||
|
* adding a node.
|
||||||
|
*/
|
||||||
|
List *relationDependencyList =
|
||||||
|
GetPublicationRelationsDependencyList(publicationId);
|
||||||
|
result = list_concat(result, relationDependencyList);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* As of PostgreSQL 15, the same applies to schemas.
|
||||||
|
*/
|
||||||
|
#if PG_VERSION_NUM >= PG_VERSION_15
|
||||||
|
List *schemaIdList =
|
||||||
|
GetPublicationSchemas(publicationId);
|
||||||
|
List *schemaDependencyList =
|
||||||
|
CreateObjectAddressDependencyDefList(NamespaceRelationId, schemaIdList);
|
||||||
|
result = list_concat(result, schemaDependencyList);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -1834,7 +1870,7 @@ static List *
|
||||||
GetRelationSequenceDependencyList(Oid relationId)
|
GetRelationSequenceDependencyList(Oid relationId)
|
||||||
{
|
{
|
||||||
List *seqInfoList = NIL;
|
List *seqInfoList = NIL;
|
||||||
GetDependentSequencesWithRelation(relationId, &seqInfoList, 0);
|
GetDependentSequencesWithRelation(relationId, &seqInfoList, 0, DEPENDENCY_AUTO);
|
||||||
|
|
||||||
List *seqIdList = NIL;
|
List *seqIdList = NIL;
|
||||||
SequenceInfo *seqInfo = NULL;
|
SequenceInfo *seqInfo = NULL;
|
||||||
|
@ -1923,6 +1959,33 @@ GetRelationTriggerFunctionDependencyList(Oid relationId)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetPublicationRelationsDependencyList creates a list of ObjectAddressDependencies for
|
||||||
|
* a publication on the Citus relations it contains. This helps make sure we distribute
|
||||||
|
* Citus tables before local tables.
|
||||||
|
*/
|
||||||
|
static List *
|
||||||
|
GetPublicationRelationsDependencyList(Oid publicationId)
|
||||||
|
{
|
||||||
|
List *allRelationIds = GetPublicationRelations(publicationId, PUBLICATION_PART_ROOT);
|
||||||
|
List *citusRelationIds = NIL;
|
||||||
|
|
||||||
|
Oid relationId = InvalidOid;
|
||||||
|
|
||||||
|
foreach_oid(relationId, allRelationIds)
|
||||||
|
{
|
||||||
|
if (!IsCitusTable(relationId))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
citusRelationIds = lappend_oid(citusRelationIds, relationId);
|
||||||
|
}
|
||||||
|
|
||||||
|
return CreateObjectAddressDependencyDefList(RelationRelationId, citusRelationIds);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GetTypeConstraintDependencyDefinition creates a list of constraint dependency
|
* GetTypeConstraintDependencyDefinition creates a list of constraint dependency
|
||||||
* definitions for a given type
|
* definitions for a given type
|
||||||
|
|
|
@ -311,7 +311,7 @@ static void InvalidateDistTableCache(void);
|
||||||
static void InvalidateDistObjectCache(void);
|
static void InvalidateDistObjectCache(void);
|
||||||
static bool InitializeTableCacheEntry(int64 shardId, bool missingOk);
|
static bool InitializeTableCacheEntry(int64 shardId, bool missingOk);
|
||||||
static bool IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
|
static bool IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
|
||||||
CitusTableType tableType);
|
uint32 colocationId, CitusTableType tableType);
|
||||||
static bool RefreshTableCacheEntryIfInvalid(ShardIdCacheEntry *shardEntry, bool
|
static bool RefreshTableCacheEntryIfInvalid(ShardIdCacheEntry *shardEntry, bool
|
||||||
missingOk);
|
missingOk);
|
||||||
|
|
||||||
|
@ -450,7 +450,36 @@ bool
|
||||||
IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEntry, CitusTableType tableType)
|
IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEntry, CitusTableType tableType)
|
||||||
{
|
{
|
||||||
return IsCitusTableTypeInternal(tableEntry->partitionMethod,
|
return IsCitusTableTypeInternal(tableEntry->partitionMethod,
|
||||||
tableEntry->replicationModel, tableType);
|
tableEntry->replicationModel,
|
||||||
|
tableEntry->colocationId, tableType);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* HasDistributionKey returs true if given Citus table doesn't have a
|
||||||
|
* distribution key.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
HasDistributionKey(Oid relationId)
|
||||||
|
{
|
||||||
|
CitusTableCacheEntry *tableEntry = LookupCitusTableCacheEntry(relationId);
|
||||||
|
if (tableEntry == NULL)
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("relation with oid %u is not a Citus table", relationId)));
|
||||||
|
}
|
||||||
|
|
||||||
|
return HasDistributionKeyCacheEntry(tableEntry);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* HasDistributionKey returs true if given cache entry identifies a Citus
|
||||||
|
* table that doesn't have a distribution key.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
HasDistributionKeyCacheEntry(CitusTableCacheEntry *tableEntry)
|
||||||
|
{
|
||||||
|
return tableEntry->partitionMethod != DISTRIBUTE_BY_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -460,7 +489,7 @@ IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEntry, CitusTableType tabl
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
|
IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
|
||||||
CitusTableType tableType)
|
uint32 colocationId, CitusTableType tableType)
|
||||||
{
|
{
|
||||||
switch (tableType)
|
switch (tableType)
|
||||||
{
|
{
|
||||||
|
@ -501,12 +530,8 @@ IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
|
||||||
case CITUS_LOCAL_TABLE:
|
case CITUS_LOCAL_TABLE:
|
||||||
{
|
{
|
||||||
return partitionMethod == DISTRIBUTE_BY_NONE &&
|
return partitionMethod == DISTRIBUTE_BY_NONE &&
|
||||||
replicationModel != REPLICATION_MODEL_2PC;
|
replicationModel != REPLICATION_MODEL_2PC &&
|
||||||
}
|
colocationId == INVALID_COLOCATION_ID;
|
||||||
|
|
||||||
case CITUS_TABLE_WITH_NO_DIST_KEY:
|
|
||||||
{
|
|
||||||
return partitionMethod == DISTRIBUTE_BY_NONE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
case ANY_CITUS_TABLE_TYPE:
|
case ANY_CITUS_TABLE_TYPE:
|
||||||
|
@ -529,33 +554,21 @@ IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
|
||||||
char *
|
char *
|
||||||
GetTableTypeName(Oid tableId)
|
GetTableTypeName(Oid tableId)
|
||||||
{
|
{
|
||||||
bool regularTable = false;
|
if (!IsCitusTable(tableId))
|
||||||
char partitionMethod = ' ';
|
|
||||||
char replicationModel = ' ';
|
|
||||||
if (IsCitusTable(tableId))
|
|
||||||
{
|
|
||||||
CitusTableCacheEntry *referencingCacheEntry = GetCitusTableCacheEntry(tableId);
|
|
||||||
partitionMethod = referencingCacheEntry->partitionMethod;
|
|
||||||
replicationModel = referencingCacheEntry->replicationModel;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
regularTable = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (regularTable)
|
|
||||||
{
|
{
|
||||||
return "regular table";
|
return "regular table";
|
||||||
}
|
}
|
||||||
else if (partitionMethod == 'h')
|
|
||||||
|
CitusTableCacheEntry *tableCacheEntry = GetCitusTableCacheEntry(tableId);
|
||||||
|
if (IsCitusTableTypeCacheEntry(tableCacheEntry, HASH_DISTRIBUTED))
|
||||||
{
|
{
|
||||||
return "distributed table";
|
return "distributed table";
|
||||||
}
|
}
|
||||||
else if (partitionMethod == 'n' && replicationModel == 't')
|
else if (IsCitusTableTypeCacheEntry(tableCacheEntry, REFERENCE_TABLE))
|
||||||
{
|
{
|
||||||
return "reference table";
|
return "reference table";
|
||||||
}
|
}
|
||||||
else if (partitionMethod == 'n' && replicationModel != 't')
|
else if (IsCitusTableTypeCacheEntry(tableCacheEntry, CITUS_LOCAL_TABLE))
|
||||||
{
|
{
|
||||||
return "citus local table";
|
return "citus local table";
|
||||||
}
|
}
|
||||||
|
@ -577,6 +590,18 @@ IsCitusTable(Oid relationId)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* IsCitusTableRangeVar returns whether the table named in the given
|
||||||
|
* rangeVar is a Citus table.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
IsCitusTableRangeVar(RangeVar *rangeVar, LOCKMODE lockMode, bool missingOK)
|
||||||
|
{
|
||||||
|
Oid relationId = RangeVarGetRelid(rangeVar, lockMode, missingOK);
|
||||||
|
return IsCitusTable(relationId);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* IsCitusTableViaCatalog returns whether the given relation is a
|
* IsCitusTableViaCatalog returns whether the given relation is a
|
||||||
* distributed table or not.
|
* distributed table or not.
|
||||||
|
@ -765,14 +790,28 @@ PgDistPartitionTupleViaCatalog(Oid relationId)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* IsCitusLocalTableByDistParams returns true if given partitionMethod and
|
* IsReferenceTableByDistParams returns true if given partitionMethod and
|
||||||
* replicationModel would identify a citus local table.
|
* replicationModel would identify a reference table.
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel)
|
IsReferenceTableByDistParams(char partitionMethod, char replicationModel)
|
||||||
{
|
{
|
||||||
return partitionMethod == DISTRIBUTE_BY_NONE &&
|
return partitionMethod == DISTRIBUTE_BY_NONE &&
|
||||||
replicationModel != REPLICATION_MODEL_2PC;
|
replicationModel == REPLICATION_MODEL_2PC;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* IsCitusLocalTableByDistParams returns true if given partitionMethod,
|
||||||
|
* replicationModel and colocationId would identify a citus local table.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel,
|
||||||
|
uint32 colocationId)
|
||||||
|
{
|
||||||
|
return partitionMethod == DISTRIBUTE_BY_NONE &&
|
||||||
|
replicationModel != REPLICATION_MODEL_2PC &&
|
||||||
|
colocationId == INVALID_COLOCATION_ID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -4837,11 +4876,14 @@ CitusTableTypeIdList(CitusTableType citusTableType)
|
||||||
|
|
||||||
Datum partMethodDatum = datumArray[Anum_pg_dist_partition_partmethod - 1];
|
Datum partMethodDatum = datumArray[Anum_pg_dist_partition_partmethod - 1];
|
||||||
Datum replicationModelDatum = datumArray[Anum_pg_dist_partition_repmodel - 1];
|
Datum replicationModelDatum = datumArray[Anum_pg_dist_partition_repmodel - 1];
|
||||||
|
Datum colocationIdDatum = datumArray[Anum_pg_dist_partition_colocationid - 1];
|
||||||
|
|
||||||
Oid partitionMethod = DatumGetChar(partMethodDatum);
|
Oid partitionMethod = DatumGetChar(partMethodDatum);
|
||||||
Oid replicationModel = DatumGetChar(replicationModelDatum);
|
Oid replicationModel = DatumGetChar(replicationModelDatum);
|
||||||
|
uint32 colocationId = DatumGetUInt32(colocationIdDatum);
|
||||||
|
|
||||||
if (IsCitusTableTypeInternal(partitionMethod, replicationModel, citusTableType))
|
if (IsCitusTableTypeInternal(partitionMethod, replicationModel, colocationId,
|
||||||
|
citusTableType))
|
||||||
{
|
{
|
||||||
Datum relationIdDatum = datumArray[Anum_pg_dist_partition_logicalrelid - 1];
|
Datum relationIdDatum = datumArray[Anum_pg_dist_partition_logicalrelid - 1];
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -985,7 +985,7 @@ AppendShardSizeQuery(StringInfo selectQuery, ShardInterval *shardInterval)
|
||||||
|
|
||||||
appendStringInfo(selectQuery, "SELECT " UINT64_FORMAT " AS shard_id, ", shardId);
|
appendStringInfo(selectQuery, "SELECT " UINT64_FORMAT " AS shard_id, ", shardId);
|
||||||
appendStringInfo(selectQuery, "%s AS shard_name, ", quotedShardName);
|
appendStringInfo(selectQuery, "%s AS shard_name, ", quotedShardName);
|
||||||
appendStringInfo(selectQuery, PG_RELATION_SIZE_FUNCTION, quotedShardName);
|
appendStringInfo(selectQuery, PG_TOTAL_RELATION_SIZE_FUNCTION, quotedShardName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1670,6 +1670,48 @@ TupleToGroupShardPlacement(TupleDesc tupleDescriptor, HeapTuple heapTuple)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* LookupTaskPlacementHostAndPort sets the nodename and nodeport for the given task placement
|
||||||
|
* with a lookup.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
LookupTaskPlacementHostAndPort(ShardPlacement *taskPlacement, char **nodeName,
|
||||||
|
int *nodePort)
|
||||||
|
{
|
||||||
|
if (IsDummyPlacement(taskPlacement))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If we create a dummy placement for the local node, it is possible
|
||||||
|
* that the entry doesn't exist in pg_dist_node, hence a lookup will fail.
|
||||||
|
* In that case we want to use the dummy placements values.
|
||||||
|
*/
|
||||||
|
*nodeName = taskPlacement->nodeName;
|
||||||
|
*nodePort = taskPlacement->nodePort;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We want to lookup the node information again since it is possible that
|
||||||
|
* there were changes in pg_dist_node and we will get those invalidations
|
||||||
|
* in LookupNodeForGroup.
|
||||||
|
*/
|
||||||
|
WorkerNode *workerNode = LookupNodeForGroup(taskPlacement->groupId);
|
||||||
|
*nodeName = workerNode->workerName;
|
||||||
|
*nodePort = workerNode->workerPort;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* IsDummyPlacement returns true if the given placement is a dummy placement.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
IsDummyPlacement(ShardPlacement *taskPlacement)
|
||||||
|
{
|
||||||
|
return taskPlacement->nodeId == LOCAL_NODE_ID;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* InsertShardRow opens the shard system catalog, and inserts a new row with the
|
* InsertShardRow opens the shard system catalog, and inserts a new row with the
|
||||||
* given values into that system catalog. Note that we allow the user to pass in
|
* given values into that system catalog. Note that we allow the user to pass in
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -425,6 +425,7 @@ ErrorIfCurrentUserCanNotDistributeObject(char *textType, ObjectType type,
|
||||||
case OBJECT_COLLATION:
|
case OBJECT_COLLATION:
|
||||||
case OBJECT_VIEW:
|
case OBJECT_VIEW:
|
||||||
case OBJECT_ROLE:
|
case OBJECT_ROLE:
|
||||||
|
case OBJECT_PUBLICATION:
|
||||||
{
|
{
|
||||||
check_object_ownership(userId, type, *addr, node, *relation);
|
check_object_ownership(userId, type, *addr, node, *relation);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -215,6 +215,7 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool
|
||||||
{
|
{
|
||||||
bool colocatedShard = true;
|
bool colocatedShard = true;
|
||||||
List *insertedShardPlacements = NIL;
|
List *insertedShardPlacements = NIL;
|
||||||
|
List *insertedShardIds = NIL;
|
||||||
|
|
||||||
/* make sure that tables are hash partitioned */
|
/* make sure that tables are hash partitioned */
|
||||||
CheckHashPartitionedTable(targetRelationId);
|
CheckHashPartitionedTable(targetRelationId);
|
||||||
|
@ -254,7 +255,9 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool
|
||||||
foreach_ptr(sourceShardInterval, sourceShardIntervalList)
|
foreach_ptr(sourceShardInterval, sourceShardIntervalList)
|
||||||
{
|
{
|
||||||
uint64 sourceShardId = sourceShardInterval->shardId;
|
uint64 sourceShardId = sourceShardInterval->shardId;
|
||||||
uint64 newShardId = GetNextShardId();
|
uint64 *newShardIdPtr = (uint64 *) palloc0(sizeof(uint64));
|
||||||
|
*newShardIdPtr = GetNextShardId();
|
||||||
|
insertedShardIds = lappend(insertedShardIds, newShardIdPtr);
|
||||||
|
|
||||||
int32 shardMinValue = DatumGetInt32(sourceShardInterval->minValue);
|
int32 shardMinValue = DatumGetInt32(sourceShardInterval->minValue);
|
||||||
int32 shardMaxValue = DatumGetInt32(sourceShardInterval->maxValue);
|
int32 shardMaxValue = DatumGetInt32(sourceShardInterval->maxValue);
|
||||||
|
@ -263,7 +266,7 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool
|
||||||
List *sourceShardPlacementList = ShardPlacementListSortedByWorker(
|
List *sourceShardPlacementList = ShardPlacementListSortedByWorker(
|
||||||
sourceShardId);
|
sourceShardId);
|
||||||
|
|
||||||
InsertShardRow(targetRelationId, newShardId, targetShardStorageType,
|
InsertShardRow(targetRelationId, *newShardIdPtr, targetShardStorageType,
|
||||||
shardMinValueText, shardMaxValueText);
|
shardMinValueText, shardMaxValueText);
|
||||||
|
|
||||||
ShardPlacement *sourcePlacement = NULL;
|
ShardPlacement *sourcePlacement = NULL;
|
||||||
|
@ -272,21 +275,26 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool
|
||||||
int32 groupId = sourcePlacement->groupId;
|
int32 groupId = sourcePlacement->groupId;
|
||||||
const uint64 shardSize = 0;
|
const uint64 shardSize = 0;
|
||||||
|
|
||||||
/*
|
InsertShardPlacementRow(*newShardIdPtr,
|
||||||
* Optimistically add shard placement row the pg_dist_shard_placement, in case
|
|
||||||
* of any error it will be roll-backed.
|
|
||||||
*/
|
|
||||||
uint64 shardPlacementId = InsertShardPlacementRow(newShardId,
|
|
||||||
INVALID_PLACEMENT_ID,
|
INVALID_PLACEMENT_ID,
|
||||||
shardSize,
|
shardSize,
|
||||||
groupId);
|
groupId);
|
||||||
|
|
||||||
ShardPlacement *shardPlacement = LoadShardPlacement(newShardId,
|
|
||||||
shardPlacementId);
|
|
||||||
insertedShardPlacements = lappend(insertedShardPlacements, shardPlacement);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* load shard placements for the shard at once after all placement insertions
|
||||||
|
* finished. That prevents MetadataCache from rebuilding unnecessarily after
|
||||||
|
* each placement insertion.
|
||||||
|
*/
|
||||||
|
uint64 *shardIdPtr;
|
||||||
|
foreach_ptr(shardIdPtr, insertedShardIds)
|
||||||
|
{
|
||||||
|
List *placementsForShard = ShardPlacementList(*shardIdPtr);
|
||||||
|
insertedShardPlacements = list_concat(insertedShardPlacements,
|
||||||
|
placementsForShard);
|
||||||
|
}
|
||||||
|
|
||||||
CreateShardsOnWorkers(targetRelationId, insertedShardPlacements,
|
CreateShardsOnWorkers(targetRelationId, insertedShardPlacements,
|
||||||
useExclusiveConnections, colocatedShard);
|
useExclusiveConnections, colocatedShard);
|
||||||
}
|
}
|
||||||
|
|
|
@ -461,10 +461,7 @@ ResolveRelationId(text *relationName, bool missingOk)
|
||||||
* definition, optional column storage and statistics definitions, and index
|
* definition, optional column storage and statistics definitions, and index
|
||||||
* constraint and trigger definitions.
|
* constraint and trigger definitions.
|
||||||
* When IncludeIdentities is NO_IDENTITY, the function does not include identity column
|
* When IncludeIdentities is NO_IDENTITY, the function does not include identity column
|
||||||
* specifications. When it's INCLUDE_IDENTITY_AS_SEQUENCE_DEFAULTS, the function
|
* specifications. When it's INCLUDE_IDENTITY it creates GENERATED .. AS IDENTIY clauses.
|
||||||
* uses sequences and set them as default values for identity columns by using exactly
|
|
||||||
* the same approach with worker_nextval('sequence') & nextval('sequence') logic
|
|
||||||
* desribed above. When it's INCLUDE_IDENTITY it creates GENERATED .. AS IDENTIY clauses.
|
|
||||||
*/
|
*/
|
||||||
List *
|
List *
|
||||||
GetFullTableCreationCommands(Oid relationId,
|
GetFullTableCreationCommands(Oid relationId,
|
||||||
|
@ -500,6 +497,15 @@ GetFullTableCreationCommands(Oid relationId,
|
||||||
tableDDLEventList = lappend(tableDDLEventList,
|
tableDDLEventList = lappend(tableDDLEventList,
|
||||||
truncateTriggerCommand);
|
truncateTriggerCommand);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For identity column sequences, we only need to modify
|
||||||
|
* their min/max values to produce unique values on the worker nodes.
|
||||||
|
*/
|
||||||
|
List *identitySequenceDependencyCommandList =
|
||||||
|
IdentitySequenceDependencyCommandList(relationId);
|
||||||
|
tableDDLEventList = list_concat(tableDDLEventList,
|
||||||
|
identitySequenceDependencyCommandList);
|
||||||
}
|
}
|
||||||
|
|
||||||
tableDDLEventList = list_concat(tableDDLEventList, postLoadCreationCommandList);
|
tableDDLEventList = list_concat(tableDDLEventList, postLoadCreationCommandList);
|
||||||
|
|
|
@ -190,6 +190,19 @@ typedef struct WorkerShardStatistics
|
||||||
HTAB *statistics;
|
HTAB *statistics;
|
||||||
} WorkerShardStatistics;
|
} WorkerShardStatistics;
|
||||||
|
|
||||||
|
/* ShardMoveDependencyHashEntry contains the taskId which any new shard move task within the corresponding colocation group must take a dependency on */
|
||||||
|
typedef struct ShardMoveDependencyInfo
|
||||||
|
{
|
||||||
|
int64 key;
|
||||||
|
int64 taskId;
|
||||||
|
} ShardMoveDependencyInfo;
|
||||||
|
|
||||||
|
typedef struct ShardMoveDependencies
|
||||||
|
{
|
||||||
|
HTAB *colocationDependencies;
|
||||||
|
HTAB *nodeDependencies;
|
||||||
|
} ShardMoveDependencies;
|
||||||
|
|
||||||
char *VariablesToBePassedToNewConnections = NULL;
|
char *VariablesToBePassedToNewConnections = NULL;
|
||||||
|
|
||||||
/* static declarations for main logic */
|
/* static declarations for main logic */
|
||||||
|
@ -475,6 +488,7 @@ GetRebalanceSteps(RebalanceOptions *options)
|
||||||
/* sort the lists to make the function more deterministic */
|
/* sort the lists to make the function more deterministic */
|
||||||
List *activeWorkerList = SortedActiveWorkers();
|
List *activeWorkerList = SortedActiveWorkers();
|
||||||
List *activeShardPlacementListList = NIL;
|
List *activeShardPlacementListList = NIL;
|
||||||
|
List *unbalancedShards = NIL;
|
||||||
|
|
||||||
Oid relationId = InvalidOid;
|
Oid relationId = InvalidOid;
|
||||||
foreach_oid(relationId, options->relationIdList)
|
foreach_oid(relationId, options->relationIdList)
|
||||||
|
@ -490,8 +504,29 @@ GetRebalanceSteps(RebalanceOptions *options)
|
||||||
shardPlacementList, options->workerNode);
|
shardPlacementList, options->workerNode);
|
||||||
}
|
}
|
||||||
|
|
||||||
activeShardPlacementListList =
|
if (list_length(activeShardPlacementListForRelation) >= list_length(
|
||||||
lappend(activeShardPlacementListList, activeShardPlacementListForRelation);
|
activeWorkerList))
|
||||||
|
{
|
||||||
|
activeShardPlacementListList = lappend(activeShardPlacementListList,
|
||||||
|
activeShardPlacementListForRelation);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If the number of shard groups are less than the number of worker nodes,
|
||||||
|
* at least one of the worker nodes will remain empty. For such cases,
|
||||||
|
* we consider those shard groups as a colocation group and try to
|
||||||
|
* distribute them across the cluster.
|
||||||
|
*/
|
||||||
|
unbalancedShards = list_concat(unbalancedShards,
|
||||||
|
activeShardPlacementListForRelation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (list_length(unbalancedShards) > 0)
|
||||||
|
{
|
||||||
|
activeShardPlacementListList = lappend(activeShardPlacementListList,
|
||||||
|
unbalancedShards);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (options->threshold < options->rebalanceStrategy->minimumThreshold)
|
if (options->threshold < options->rebalanceStrategy->minimumThreshold)
|
||||||
|
@ -1796,10 +1831,10 @@ static void
|
||||||
RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid)
|
RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid)
|
||||||
{
|
{
|
||||||
char transferMode = LookupShardTransferMode(shardReplicationModeOid);
|
char transferMode = LookupShardTransferMode(shardReplicationModeOid);
|
||||||
EnsureReferenceTablesExistOnAllNodesExtended(transferMode);
|
|
||||||
|
|
||||||
if (list_length(options->relationIdList) == 0)
|
if (list_length(options->relationIdList) == 0)
|
||||||
{
|
{
|
||||||
|
EnsureReferenceTablesExistOnAllNodesExtended(transferMode);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1814,6 +1849,25 @@ RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid)
|
||||||
|
|
||||||
List *placementUpdateList = GetRebalanceSteps(options);
|
List *placementUpdateList = GetRebalanceSteps(options);
|
||||||
|
|
||||||
|
if (transferMode == TRANSFER_MODE_AUTOMATIC)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If the shard transfer mode is set to auto, we should check beforehand
|
||||||
|
* if we are able to use logical replication to transfer shards or not.
|
||||||
|
* We throw an error if any of the tables do not have a replica identity, which
|
||||||
|
* is required for logical replication to replicate UPDATE and DELETE commands.
|
||||||
|
*/
|
||||||
|
PlacementUpdateEvent *placementUpdate = NULL;
|
||||||
|
foreach_ptr(placementUpdate, placementUpdateList)
|
||||||
|
{
|
||||||
|
Oid relationId = RelationIdForShard(placementUpdate->shardId);
|
||||||
|
List *colocatedTableList = ColocatedTableList(relationId);
|
||||||
|
VerifyTablesHaveReplicaIdentity(colocatedTableList);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EnsureReferenceTablesExistOnAllNodesExtended(transferMode);
|
||||||
|
|
||||||
if (list_length(placementUpdateList) == 0)
|
if (list_length(placementUpdateList) == 0)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
|
@ -1857,6 +1911,137 @@ ErrorOnConcurrentRebalance(RebalanceOptions *options)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetColocationId function returns the colocationId of the shard in a PlacementUpdateEvent.
|
||||||
|
*/
|
||||||
|
static int64
|
||||||
|
GetColocationId(PlacementUpdateEvent *move)
|
||||||
|
{
|
||||||
|
ShardInterval *shardInterval = LoadShardInterval(move->shardId);
|
||||||
|
|
||||||
|
CitusTableCacheEntry *citusTableCacheEntry = GetCitusTableCacheEntry(
|
||||||
|
shardInterval->relationId);
|
||||||
|
|
||||||
|
return citusTableCacheEntry->colocationId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* InitializeShardMoveDependencies function creates the hash maps that we use to track
|
||||||
|
* the latest moves so that subsequent moves with the same properties must take a dependency
|
||||||
|
* on them. There are two hash maps. One is for tracking the latest move scheduled in a
|
||||||
|
* given colocation group and the other one is for tracking the latest move which involves
|
||||||
|
* a given node either as its source node or its target node.
|
||||||
|
*/
|
||||||
|
static ShardMoveDependencies
|
||||||
|
InitializeShardMoveDependencies()
|
||||||
|
{
|
||||||
|
ShardMoveDependencies shardMoveDependencies;
|
||||||
|
shardMoveDependencies.colocationDependencies = CreateSimpleHashWithNameAndSize(int64,
|
||||||
|
ShardMoveDependencyInfo,
|
||||||
|
"colocationDependencyHashMap",
|
||||||
|
6);
|
||||||
|
shardMoveDependencies.nodeDependencies = CreateSimpleHashWithNameAndSize(int64,
|
||||||
|
ShardMoveDependencyInfo,
|
||||||
|
"nodeDependencyHashMap",
|
||||||
|
6);
|
||||||
|
|
||||||
|
return shardMoveDependencies;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GenerateTaskMoveDependencyList creates and returns a List of taskIds that
|
||||||
|
* the move must take a dependency on.
|
||||||
|
*/
|
||||||
|
static int64 *
|
||||||
|
GenerateTaskMoveDependencyList(PlacementUpdateEvent *move, int64 colocationId,
|
||||||
|
ShardMoveDependencies shardMoveDependencies, int *nDepends)
|
||||||
|
{
|
||||||
|
HTAB *dependsList = CreateSimpleHashSetWithNameAndSize(int64,
|
||||||
|
"shardMoveDependencyList", 0);
|
||||||
|
|
||||||
|
bool found;
|
||||||
|
|
||||||
|
/* Check if there exists a move in the same colocation group scheduled earlier. */
|
||||||
|
ShardMoveDependencyInfo *shardMoveDependencyInfo = hash_search(
|
||||||
|
shardMoveDependencies.colocationDependencies, &colocationId, HASH_ENTER, &found);
|
||||||
|
|
||||||
|
if (found)
|
||||||
|
{
|
||||||
|
hash_search(dependsList, &shardMoveDependencyInfo->taskId, HASH_ENTER, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if there exists a move scheduled earlier whose source or target node
|
||||||
|
* overlaps with the current move's source node. */
|
||||||
|
shardMoveDependencyInfo = hash_search(
|
||||||
|
shardMoveDependencies.nodeDependencies, &move->sourceNode->nodeId, HASH_ENTER,
|
||||||
|
&found);
|
||||||
|
|
||||||
|
if (found)
|
||||||
|
{
|
||||||
|
hash_search(dependsList, &shardMoveDependencyInfo->taskId, HASH_ENTER, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if there exists a move scheduled earlier whose source or target node
|
||||||
|
* overlaps with the current move's target node. */
|
||||||
|
shardMoveDependencyInfo = hash_search(
|
||||||
|
shardMoveDependencies.nodeDependencies, &move->targetNode->nodeId, HASH_ENTER,
|
||||||
|
&found);
|
||||||
|
|
||||||
|
|
||||||
|
if (found)
|
||||||
|
{
|
||||||
|
hash_search(dependsList, &shardMoveDependencyInfo->taskId, HASH_ENTER, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
*nDepends = hash_get_num_entries(dependsList);
|
||||||
|
|
||||||
|
int64 *dependsArray = NULL;
|
||||||
|
|
||||||
|
if (*nDepends > 0)
|
||||||
|
{
|
||||||
|
HASH_SEQ_STATUS seq;
|
||||||
|
|
||||||
|
dependsArray = palloc((*nDepends) * sizeof(int64));
|
||||||
|
|
||||||
|
hash_seq_init(&seq, dependsList);
|
||||||
|
int i = 0;
|
||||||
|
int64 *dependsTaskId;
|
||||||
|
|
||||||
|
while ((dependsTaskId = (int64 *) hash_seq_search(&seq)) != NULL)
|
||||||
|
{
|
||||||
|
dependsArray[i++] = *dependsTaskId;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return dependsArray;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* UpdateShardMoveDependencies function updates the dependency maps with the latest move's taskId.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
UpdateShardMoveDependencies(PlacementUpdateEvent *move, uint64 colocationId, int64 taskId,
|
||||||
|
ShardMoveDependencies shardMoveDependencies)
|
||||||
|
{
|
||||||
|
ShardMoveDependencyInfo *shardMoveDependencyInfo = hash_search(
|
||||||
|
shardMoveDependencies.colocationDependencies, &colocationId, HASH_ENTER, NULL);
|
||||||
|
shardMoveDependencyInfo->taskId = taskId;
|
||||||
|
|
||||||
|
shardMoveDependencyInfo = hash_search(shardMoveDependencies.nodeDependencies,
|
||||||
|
&move->sourceNode->nodeId, HASH_ENTER, NULL);
|
||||||
|
|
||||||
|
shardMoveDependencyInfo->taskId = taskId;
|
||||||
|
|
||||||
|
shardMoveDependencyInfo = hash_search(shardMoveDependencies.nodeDependencies,
|
||||||
|
&move->targetNode->nodeId, HASH_ENTER, NULL);
|
||||||
|
|
||||||
|
shardMoveDependencyInfo->taskId = taskId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RebalanceTableShardsBackground rebalances the shards for the relations
|
* RebalanceTableShardsBackground rebalances the shards for the relations
|
||||||
* inside the relationIdList across the different workers. It does so using our
|
* inside the relationIdList across the different workers. It does so using our
|
||||||
|
@ -1894,12 +2079,6 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
|
||||||
EnsureTableOwner(colocatedTableId);
|
EnsureTableOwner(colocatedTableId);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (shardTransferMode == TRANSFER_MODE_AUTOMATIC)
|
|
||||||
{
|
|
||||||
/* make sure that all tables included in the rebalance have a replica identity*/
|
|
||||||
VerifyTablesHaveReplicaIdentity(colocatedTableList);
|
|
||||||
}
|
|
||||||
|
|
||||||
List *placementUpdateList = GetRebalanceSteps(options);
|
List *placementUpdateList = GetRebalanceSteps(options);
|
||||||
|
|
||||||
if (list_length(placementUpdateList) == 0)
|
if (list_length(placementUpdateList) == 0)
|
||||||
|
@ -1908,6 +2087,23 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (shardTransferMode == TRANSFER_MODE_AUTOMATIC)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If the shard transfer mode is set to auto, we should check beforehand
|
||||||
|
* if we are able to use logical replication to transfer shards or not.
|
||||||
|
* We throw an error if any of the tables do not have a replica identity, which
|
||||||
|
* is required for logical replication to replicate UPDATE and DELETE commands.
|
||||||
|
*/
|
||||||
|
PlacementUpdateEvent *placementUpdate = NULL;
|
||||||
|
foreach_ptr(placementUpdate, placementUpdateList)
|
||||||
|
{
|
||||||
|
relationId = RelationIdForShard(placementUpdate->shardId);
|
||||||
|
List *colocatedTables = ColocatedTableList(relationId);
|
||||||
|
VerifyTablesHaveReplicaIdentity(colocatedTables);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
DropOrphanedResourcesInSeparateTransaction();
|
DropOrphanedResourcesInSeparateTransaction();
|
||||||
|
|
||||||
/* find the name of the shard transfer mode to interpolate in the scheduled command */
|
/* find the name of the shard transfer mode to interpolate in the scheduled command */
|
||||||
|
@ -1922,18 +2118,8 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
|
||||||
StringInfoData buf = { 0 };
|
StringInfoData buf = { 0 };
|
||||||
initStringInfo(&buf);
|
initStringInfo(&buf);
|
||||||
|
|
||||||
/*
|
|
||||||
* Currently we only have two tasks that any move can depend on:
|
|
||||||
* - replicating reference tables
|
|
||||||
* - the previous move
|
|
||||||
*
|
|
||||||
* prevJobIdx tells what slot to write the id of the task into. We only use both slots
|
|
||||||
* if we are actually replicating reference tables.
|
|
||||||
*/
|
|
||||||
int64 prevJobId[2] = { 0 };
|
|
||||||
int prevJobIdx = 0;
|
|
||||||
|
|
||||||
List *referenceTableIdList = NIL;
|
List *referenceTableIdList = NIL;
|
||||||
|
int64 replicateRefTablesTaskId = 0;
|
||||||
|
|
||||||
if (HasNodesWithMissingReferenceTables(&referenceTableIdList))
|
if (HasNodesWithMissingReferenceTables(&referenceTableIdList))
|
||||||
{
|
{
|
||||||
|
@ -1949,15 +2135,15 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
|
||||||
appendStringInfo(&buf,
|
appendStringInfo(&buf,
|
||||||
"SELECT pg_catalog.replicate_reference_tables(%s)",
|
"SELECT pg_catalog.replicate_reference_tables(%s)",
|
||||||
quote_literal_cstr(shardTranferModeLabel));
|
quote_literal_cstr(shardTranferModeLabel));
|
||||||
BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data,
|
BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data, 0,
|
||||||
prevJobIdx, prevJobId);
|
NULL);
|
||||||
prevJobId[prevJobIdx] = task->taskid;
|
replicateRefTablesTaskId = task->taskid;
|
||||||
prevJobIdx++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PlacementUpdateEvent *move = NULL;
|
PlacementUpdateEvent *move = NULL;
|
||||||
bool first = true;
|
|
||||||
int prevMoveIndex = prevJobIdx;
|
ShardMoveDependencies shardMoveDependencies = InitializeShardMoveDependencies();
|
||||||
|
|
||||||
foreach_ptr(move, placementUpdateList)
|
foreach_ptr(move, placementUpdateList)
|
||||||
{
|
{
|
||||||
resetStringInfo(&buf);
|
resetStringInfo(&buf);
|
||||||
|
@ -1969,14 +2155,27 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
|
||||||
move->targetNode->nodeId,
|
move->targetNode->nodeId,
|
||||||
quote_literal_cstr(shardTranferModeLabel));
|
quote_literal_cstr(shardTranferModeLabel));
|
||||||
|
|
||||||
BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data,
|
int64 colocationId = GetColocationId(move);
|
||||||
prevJobIdx, prevJobId);
|
|
||||||
prevJobId[prevMoveIndex] = task->taskid;
|
int nDepends = 0;
|
||||||
if (first)
|
|
||||||
|
int64 *dependsArray = GenerateTaskMoveDependencyList(move, colocationId,
|
||||||
|
shardMoveDependencies,
|
||||||
|
&nDepends);
|
||||||
|
|
||||||
|
if (nDepends == 0 && replicateRefTablesTaskId > 0)
|
||||||
{
|
{
|
||||||
first = false;
|
nDepends = 1;
|
||||||
prevJobIdx++;
|
dependsArray = palloc(nDepends * sizeof(int64));
|
||||||
|
dependsArray[0] = replicateRefTablesTaskId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data,
|
||||||
|
nDepends,
|
||||||
|
dependsArray);
|
||||||
|
|
||||||
|
UpdateShardMoveDependencies(move, colocationId, task->taskid,
|
||||||
|
shardMoveDependencies);
|
||||||
}
|
}
|
||||||
|
|
||||||
ereport(NOTICE,
|
ereport(NOTICE,
|
||||||
|
|
|
@ -70,22 +70,43 @@ typedef struct ShardCommandList
|
||||||
List *ddlCommandList;
|
List *ddlCommandList;
|
||||||
} ShardCommandList;
|
} ShardCommandList;
|
||||||
|
|
||||||
|
static const char *ShardTransferTypeNames[] = {
|
||||||
|
[SHARD_TRANSFER_INVALID_FIRST] = "unknown",
|
||||||
|
[SHARD_TRANSFER_MOVE] = "move",
|
||||||
|
[SHARD_TRANSFER_COPY] = "copy",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char *ShardTransferTypeNamesCapitalized[] = {
|
||||||
|
[SHARD_TRANSFER_INVALID_FIRST] = "unknown",
|
||||||
|
[SHARD_TRANSFER_MOVE] = "Move",
|
||||||
|
[SHARD_TRANSFER_COPY] = "Copy",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char *ShardTransferTypeNamesContinuous[] = {
|
||||||
|
[SHARD_TRANSFER_INVALID_FIRST] = "unknown",
|
||||||
|
[SHARD_TRANSFER_MOVE] = "Moving",
|
||||||
|
[SHARD_TRANSFER_COPY] = "Copying",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char *ShardTransferTypeFunctionNames[] = {
|
||||||
|
[SHARD_TRANSFER_INVALID_FIRST] = "unknown",
|
||||||
|
[SHARD_TRANSFER_MOVE] = "citus_move_shard_placement",
|
||||||
|
[SHARD_TRANSFER_COPY] = "citus_copy_shard_placement",
|
||||||
|
};
|
||||||
|
|
||||||
/* local function forward declarations */
|
/* local function forward declarations */
|
||||||
static bool CanUseLogicalReplication(Oid relationId, char shardReplicationMode);
|
static bool CanUseLogicalReplication(Oid relationId, char shardReplicationMode);
|
||||||
static void ErrorIfTableCannotBeReplicated(Oid relationId);
|
static void ErrorIfTableCannotBeReplicated(Oid relationId);
|
||||||
static void ErrorIfTargetNodeIsNotSafeToCopyTo(const char *targetNodeName,
|
static void ErrorIfTargetNodeIsNotSafeForTransfer(const char *targetNodeName,
|
||||||
int targetNodePort);
|
int targetNodePort,
|
||||||
|
ShardTransferType transferType);
|
||||||
static void ErrorIfSameNode(char *sourceNodeName, int sourceNodePort,
|
static void ErrorIfSameNode(char *sourceNodeName, int sourceNodePort,
|
||||||
char *targetNodeName, int targetNodePort,
|
char *targetNodeName, int targetNodePort,
|
||||||
const char *operationName);
|
const char *operationName);
|
||||||
static void ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName,
|
|
||||||
int32 sourceNodePort, char *targetNodeName,
|
|
||||||
int32 targetNodePort,
|
|
||||||
char shardReplicationMode);
|
|
||||||
static void CopyShardTables(List *shardIntervalList, char *sourceNodeName,
|
static void CopyShardTables(List *shardIntervalList, char *sourceNodeName,
|
||||||
int32 sourceNodePort, char *targetNodeName,
|
int32 sourceNodePort, char *targetNodeName,
|
||||||
int32 targetNodePort, bool useLogicalReplication,
|
int32 targetNodePort, bool useLogicalReplication,
|
||||||
char *operationName);
|
const char *operationName);
|
||||||
static void CopyShardTablesViaLogicalReplication(List *shardIntervalList,
|
static void CopyShardTablesViaLogicalReplication(List *shardIntervalList,
|
||||||
char *sourceNodeName,
|
char *sourceNodeName,
|
||||||
int32 sourceNodePort,
|
int32 sourceNodePort,
|
||||||
|
@ -100,7 +121,7 @@ static void EnsureShardCanBeCopied(int64 shardId, const char *sourceNodeName,
|
||||||
int32 targetNodePort);
|
int32 targetNodePort);
|
||||||
static List * RecreateTableDDLCommandList(Oid relationId);
|
static List * RecreateTableDDLCommandList(Oid relationId);
|
||||||
static void EnsureTableListOwner(List *tableIdList);
|
static void EnsureTableListOwner(List *tableIdList);
|
||||||
static void EnsureTableListSuitableForReplication(List *tableIdList);
|
static void ErrorIfReplicatingDistributedTableWithFKeys(List *tableIdList);
|
||||||
|
|
||||||
static void DropShardPlacementsFromMetadata(List *shardList,
|
static void DropShardPlacementsFromMetadata(List *shardList,
|
||||||
char *nodeName,
|
char *nodeName,
|
||||||
|
@ -112,12 +133,28 @@ static void UpdateColocatedShardPlacementMetadataOnWorkers(int64 shardId,
|
||||||
int32 targetNodePort);
|
int32 targetNodePort);
|
||||||
static bool IsShardListOnNode(List *colocatedShardList, char *targetNodeName,
|
static bool IsShardListOnNode(List *colocatedShardList, char *targetNodeName,
|
||||||
uint32 targetPort);
|
uint32 targetPort);
|
||||||
|
static void SetupRebalanceMonitorForShardTransfer(uint64 shardId, Oid distributedTableId,
|
||||||
|
char *sourceNodeName,
|
||||||
|
uint32 sourceNodePort,
|
||||||
|
char *targetNodeName,
|
||||||
|
uint32 targetNodePort,
|
||||||
|
ShardTransferType transferType);
|
||||||
static void CheckSpaceConstraints(MultiConnection *connection,
|
static void CheckSpaceConstraints(MultiConnection *connection,
|
||||||
uint64 colocationSizeInBytes);
|
uint64 colocationSizeInBytes);
|
||||||
|
static void EnsureAllShardsCanBeCopied(List *colocatedShardList,
|
||||||
|
char *sourceNodeName, uint32 sourceNodePort,
|
||||||
|
char *targetNodeName, uint32 targetNodePort);
|
||||||
static void EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList,
|
static void EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList,
|
||||||
char *sourceNodeName, uint32 sourceNodePort,
|
char *sourceNodeName, uint32 sourceNodePort,
|
||||||
char *targetNodeName, uint32
|
char *targetNodeName, uint32 targetNodePort,
|
||||||
targetNodePort);
|
ShardTransferType transferType);
|
||||||
|
static bool TransferAlreadyCompleted(List *colocatedShardList,
|
||||||
|
char *sourceNodeName, uint32 sourceNodePort,
|
||||||
|
char *targetNodeName, uint32 targetNodePort,
|
||||||
|
ShardTransferType transferType);
|
||||||
|
static void LockColocatedRelationsForMove(List *colocatedTableList);
|
||||||
|
static void ErrorIfForeignTableForShardTransfer(List *colocatedTableList,
|
||||||
|
ShardTransferType transferType);
|
||||||
static List * RecreateShardDDLCommandList(ShardInterval *shardInterval,
|
static List * RecreateShardDDLCommandList(ShardInterval *shardInterval,
|
||||||
const char *sourceNodeName,
|
const char *sourceNodeName,
|
||||||
int32 sourceNodePort);
|
int32 sourceNodePort);
|
||||||
|
@ -163,9 +200,9 @@ citus_copy_shard_placement(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
|
char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
|
||||||
|
|
||||||
ReplicateColocatedShardPlacement(shardId, sourceNodeName, sourceNodePort,
|
TransferShards(shardId, sourceNodeName, sourceNodePort,
|
||||||
targetNodeName, targetNodePort,
|
targetNodeName, targetNodePort,
|
||||||
shardReplicationMode);
|
shardReplicationMode, SHARD_TRANSFER_COPY);
|
||||||
|
|
||||||
PG_RETURN_VOID();
|
PG_RETURN_VOID();
|
||||||
}
|
}
|
||||||
|
@ -192,10 +229,9 @@ citus_copy_shard_placement_with_nodeid(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
|
char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
|
||||||
|
|
||||||
ReplicateColocatedShardPlacement(shardId,
|
TransferShards(shardId, sourceNode->workerName, sourceNode->workerPort,
|
||||||
sourceNode->workerName, sourceNode->workerPort,
|
|
||||||
targetNode->workerName, targetNode->workerPort,
|
targetNode->workerName, targetNode->workerPort,
|
||||||
shardReplicationMode);
|
shardReplicationMode, SHARD_TRANSFER_COPY);
|
||||||
|
|
||||||
PG_RETURN_VOID();
|
PG_RETURN_VOID();
|
||||||
}
|
}
|
||||||
|
@ -228,9 +264,9 @@ master_copy_shard_placement(PG_FUNCTION_ARGS)
|
||||||
ereport(WARNING, (errmsg("do_repair argument is deprecated")));
|
ereport(WARNING, (errmsg("do_repair argument is deprecated")));
|
||||||
}
|
}
|
||||||
|
|
||||||
ReplicateColocatedShardPlacement(shardId, sourceNodeName, sourceNodePort,
|
TransferShards(shardId, sourceNodeName, sourceNodePort,
|
||||||
targetNodeName, targetNodePort,
|
targetNodeName, targetNodePort,
|
||||||
shardReplicationMode);
|
shardReplicationMode, SHARD_TRANSFER_COPY);
|
||||||
|
|
||||||
|
|
||||||
PG_RETURN_VOID();
|
PG_RETURN_VOID();
|
||||||
|
@ -264,9 +300,10 @@ citus_move_shard_placement(PG_FUNCTION_ARGS)
|
||||||
int32 targetNodePort = PG_GETARG_INT32(4);
|
int32 targetNodePort = PG_GETARG_INT32(4);
|
||||||
Oid shardReplicationModeOid = PG_GETARG_OID(5);
|
Oid shardReplicationModeOid = PG_GETARG_OID(5);
|
||||||
|
|
||||||
citus_move_shard_placement_internal(shardId, sourceNodeName, sourceNodePort,
|
char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
|
||||||
|
TransferShards(shardId, sourceNodeName, sourceNodePort,
|
||||||
targetNodeName, targetNodePort,
|
targetNodeName, targetNodePort,
|
||||||
shardReplicationModeOid);
|
shardReplicationMode, SHARD_TRANSFER_MOVE);
|
||||||
|
|
||||||
PG_RETURN_VOID();
|
PG_RETURN_VOID();
|
||||||
}
|
}
|
||||||
|
@ -291,126 +328,111 @@ citus_move_shard_placement_with_nodeid(PG_FUNCTION_ARGS)
|
||||||
WorkerNode *sourceNode = FindNodeWithNodeId(sourceNodeId, missingOk);
|
WorkerNode *sourceNode = FindNodeWithNodeId(sourceNodeId, missingOk);
|
||||||
WorkerNode *targetNode = FindNodeWithNodeId(targetNodeId, missingOk);
|
WorkerNode *targetNode = FindNodeWithNodeId(targetNodeId, missingOk);
|
||||||
|
|
||||||
citus_move_shard_placement_internal(shardId, sourceNode->workerName,
|
char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
|
||||||
|
TransferShards(shardId, sourceNode->workerName,
|
||||||
sourceNode->workerPort, targetNode->workerName,
|
sourceNode->workerPort, targetNode->workerName,
|
||||||
targetNode->workerPort,
|
targetNode->workerPort, shardReplicationMode, SHARD_TRANSFER_MOVE);
|
||||||
shardReplicationModeOid);
|
|
||||||
|
|
||||||
PG_RETURN_VOID();
|
PG_RETURN_VOID();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* citus_move_shard_placement_internal is the internal function for shard moves.
|
* TransferShards is the function for shard transfers.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName,
|
TransferShards(int64 shardId, char *sourceNodeName,
|
||||||
int32 sourceNodePort, char *targetNodeName,
|
int32 sourceNodePort, char *targetNodeName,
|
||||||
int32 targetNodePort, Oid shardReplicationModeOid)
|
int32 targetNodePort, char shardReplicationMode,
|
||||||
|
ShardTransferType transferType)
|
||||||
{
|
{
|
||||||
ListCell *colocatedTableCell = NULL;
|
/* strings to be used in log messages */
|
||||||
ListCell *colocatedShardCell = NULL;
|
const char *operationName = ShardTransferTypeNames[transferType];
|
||||||
|
const char *operationNameCapitalized =
|
||||||
|
ShardTransferTypeNamesCapitalized[transferType];
|
||||||
|
const char *operationFunctionName = ShardTransferTypeFunctionNames[transferType];
|
||||||
|
|
||||||
|
/* cannot transfer shard to the same node */
|
||||||
ErrorIfSameNode(sourceNodeName, sourceNodePort,
|
ErrorIfSameNode(sourceNodeName, sourceNodePort,
|
||||||
targetNodeName, targetNodePort,
|
targetNodeName, targetNodePort,
|
||||||
"move");
|
operationName);
|
||||||
|
|
||||||
Oid relationId = RelationIdForShard(shardId);
|
|
||||||
ErrorIfMoveUnsupportedTableType(relationId);
|
|
||||||
ErrorIfTargetNodeIsNotSafeToMove(targetNodeName, targetNodePort);
|
|
||||||
|
|
||||||
AcquirePlacementColocationLock(relationId, ExclusiveLock, "move");
|
|
||||||
|
|
||||||
ShardInterval *shardInterval = LoadShardInterval(shardId);
|
ShardInterval *shardInterval = LoadShardInterval(shardId);
|
||||||
Oid distributedTableId = shardInterval->relationId;
|
Oid distributedTableId = shardInterval->relationId;
|
||||||
|
|
||||||
|
/* error if unsupported shard transfer */
|
||||||
|
if (transferType == SHARD_TRANSFER_MOVE)
|
||||||
|
{
|
||||||
|
ErrorIfMoveUnsupportedTableType(distributedTableId);
|
||||||
|
}
|
||||||
|
else if (transferType == SHARD_TRANSFER_COPY)
|
||||||
|
{
|
||||||
|
ErrorIfTableCannotBeReplicated(distributedTableId);
|
||||||
|
EnsureNoModificationsHaveBeenDone();
|
||||||
|
}
|
||||||
|
|
||||||
|
ErrorIfTargetNodeIsNotSafeForTransfer(targetNodeName, targetNodePort, transferType);
|
||||||
|
|
||||||
|
AcquirePlacementColocationLock(distributedTableId, ExclusiveLock, operationName);
|
||||||
|
|
||||||
List *colocatedTableList = ColocatedTableList(distributedTableId);
|
List *colocatedTableList = ColocatedTableList(distributedTableId);
|
||||||
List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
|
List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
|
||||||
|
|
||||||
foreach(colocatedTableCell, colocatedTableList)
|
EnsureTableListOwner(colocatedTableList);
|
||||||
|
|
||||||
|
if (transferType == SHARD_TRANSFER_MOVE)
|
||||||
{
|
{
|
||||||
Oid colocatedTableId = lfirst_oid(colocatedTableCell);
|
|
||||||
|
|
||||||
/* check that user has owner rights in all co-located tables */
|
|
||||||
EnsureTableOwner(colocatedTableId);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Block concurrent DDL / TRUNCATE commands on the relation. Similarly,
|
* Block concurrent DDL / TRUNCATE commands on the relation. Similarly,
|
||||||
* block concurrent citus_move_shard_placement() on any shard of
|
* block concurrent citus_move_shard_placement() on any shard of
|
||||||
* the same relation. This is OK for now since we're executing shard
|
* the same relation. This is OK for now since we're executing shard
|
||||||
* moves sequentially anyway.
|
* moves sequentially anyway.
|
||||||
*/
|
*/
|
||||||
LockRelationOid(colocatedTableId, ShareUpdateExclusiveLock);
|
LockColocatedRelationsForMove(colocatedTableList);
|
||||||
|
}
|
||||||
|
|
||||||
if (IsForeignTable(relationId))
|
ErrorIfForeignTableForShardTransfer(colocatedTableList, transferType);
|
||||||
|
|
||||||
|
if (transferType == SHARD_TRANSFER_COPY)
|
||||||
{
|
{
|
||||||
char *relationName = get_rel_name(colocatedTableId);
|
ErrorIfReplicatingDistributedTableWithFKeys(colocatedTableList);
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
||||||
errmsg("cannot move shard"),
|
|
||||||
errdetail("Table %s is a foreign table. Moving "
|
|
||||||
"shards backed by foreign tables is "
|
|
||||||
"not supported.", relationName)));
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* we sort colocatedShardList so that lock operations will not cause any deadlocks */
|
|
||||||
colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there are no active placements on the source and only active placements on
|
* We sort shardIntervalList so that lock operations will not cause any
|
||||||
* the target node, we assume the copy to already be done.
|
* deadlocks.
|
||||||
*/
|
*/
|
||||||
if (IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) &&
|
colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById);
|
||||||
!IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort))
|
|
||||||
|
if (TransferAlreadyCompleted(colocatedShardList,
|
||||||
|
sourceNodeName, sourceNodePort,
|
||||||
|
targetNodeName, targetNodePort,
|
||||||
|
transferType))
|
||||||
{
|
{
|
||||||
|
/* if the transfer is already completed, we can return right away */
|
||||||
ereport(WARNING, (errmsg("shard is already present on node %s:%d",
|
ereport(WARNING, (errmsg("shard is already present on node %s:%d",
|
||||||
targetNodeName, targetNodePort),
|
targetNodeName, targetNodePort),
|
||||||
errdetail("Move may have already completed.")));
|
errdetail("%s may have already completed.",
|
||||||
|
operationNameCapitalized)));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach(colocatedShardCell, colocatedShardList)
|
EnsureAllShardsCanBeCopied(colocatedShardList, sourceNodeName, sourceNodePort,
|
||||||
{
|
|
||||||
ShardInterval *colocatedShard = (ShardInterval *) lfirst(colocatedShardCell);
|
|
||||||
uint64 colocatedShardId = colocatedShard->shardId;
|
|
||||||
|
|
||||||
EnsureShardCanBeCopied(colocatedShardId, sourceNodeName, sourceNodePort,
|
|
||||||
targetNodeName, targetNodePort);
|
targetNodeName, targetNodePort);
|
||||||
}
|
|
||||||
|
|
||||||
char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
|
|
||||||
if (shardReplicationMode == TRANSFER_MODE_AUTOMATIC)
|
if (shardReplicationMode == TRANSFER_MODE_AUTOMATIC)
|
||||||
{
|
{
|
||||||
VerifyTablesHaveReplicaIdentity(colocatedTableList);
|
VerifyTablesHaveReplicaIdentity(colocatedTableList);
|
||||||
}
|
}
|
||||||
|
|
||||||
EnsureEnoughDiskSpaceForShardMove(colocatedShardList, sourceNodeName, sourceNodePort,
|
EnsureEnoughDiskSpaceForShardMove(colocatedShardList,
|
||||||
targetNodeName, targetNodePort);
|
sourceNodeName, sourceNodePort,
|
||||||
|
targetNodeName, targetNodePort, transferType);
|
||||||
|
|
||||||
|
SetupRebalanceMonitorForShardTransfer(shardId, distributedTableId,
|
||||||
/*
|
sourceNodeName, sourceNodePort,
|
||||||
* We want to be able to track progress of shard moves using
|
targetNodeName, targetNodePort,
|
||||||
* get_rebalancer_progress. If this move is initiated by the rebalancer,
|
transferType);
|
||||||
* then the rebalancer call has already set up the shared memory that is
|
|
||||||
* used to do that. But if citus_move_shard_placement is called directly by
|
|
||||||
* the user (or through any other mechanism), then the shared memory is not
|
|
||||||
* set up yet. In that case we do it here.
|
|
||||||
*/
|
|
||||||
if (!IsRebalancerInternalBackend())
|
|
||||||
{
|
|
||||||
WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort);
|
|
||||||
WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort);
|
|
||||||
|
|
||||||
PlacementUpdateEvent *placementUpdateEvent = palloc0(
|
|
||||||
sizeof(PlacementUpdateEvent));
|
|
||||||
placementUpdateEvent->updateType = PLACEMENT_UPDATE_MOVE;
|
|
||||||
placementUpdateEvent->shardId = shardId;
|
|
||||||
placementUpdateEvent->sourceNode = sourceNode;
|
|
||||||
placementUpdateEvent->targetNode = targetNode;
|
|
||||||
SetupRebalanceMonitor(list_make1(placementUpdateEvent), relationId,
|
|
||||||
REBALANCE_PROGRESS_MOVING,
|
|
||||||
PLACEMENT_UPDATE_STATUS_SETTING_UP);
|
|
||||||
}
|
|
||||||
|
|
||||||
UpdatePlacementUpdateStatusForShardIntervalList(
|
UpdatePlacementUpdateStatusForShardIntervalList(
|
||||||
colocatedShardList,
|
colocatedShardList,
|
||||||
|
@ -428,7 +450,7 @@ citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName,
|
||||||
{
|
{
|
||||||
BlockWritesToShardList(colocatedShardList);
|
BlockWritesToShardList(colocatedShardList);
|
||||||
}
|
}
|
||||||
else
|
else if (transferType == SHARD_TRANSFER_MOVE)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* We prevent multiple shard moves in a transaction that use logical
|
* We prevent multiple shard moves in a transaction that use logical
|
||||||
|
@ -452,6 +474,20 @@ citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName,
|
||||||
PlacementMovedUsingLogicalReplicationInTX = true;
|
PlacementMovedUsingLogicalReplicationInTX = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (transferType == SHARD_TRANSFER_COPY &&
|
||||||
|
!IsCitusTableType(distributedTableId, REFERENCE_TABLE))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* When copying a shard to a new node, we should first ensure that reference
|
||||||
|
* tables are present such that joins work immediately after copying the shard.
|
||||||
|
* When copying a reference table, we are probably trying to achieve just that.
|
||||||
|
*
|
||||||
|
* Since this a long-running operation we do this after the error checks, but
|
||||||
|
* before taking metadata locks.
|
||||||
|
*/
|
||||||
|
EnsureReferenceTablesExistOnAllNodesExtended(shardReplicationMode);
|
||||||
|
}
|
||||||
|
|
||||||
DropOrphanedResourcesInSeparateTransaction();
|
DropOrphanedResourcesInSeparateTransaction();
|
||||||
|
|
||||||
ShardInterval *colocatedShard = NULL;
|
ShardInterval *colocatedShard = NULL;
|
||||||
|
@ -466,18 +502,21 @@ citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName,
|
||||||
ErrorIfCleanupRecordForShardExists(qualifiedShardName);
|
ErrorIfCleanupRecordForShardExists(qualifiedShardName);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* CopyColocatedShardPlacement function copies given shard with its co-located
|
|
||||||
* shards.
|
|
||||||
*/
|
|
||||||
CopyShardTables(colocatedShardList, sourceNodeName, sourceNodePort, targetNodeName,
|
CopyShardTables(colocatedShardList, sourceNodeName, sourceNodePort, targetNodeName,
|
||||||
targetNodePort, useLogicalReplication, "citus_move_shard_placement");
|
targetNodePort, useLogicalReplication, operationFunctionName);
|
||||||
|
|
||||||
|
if (transferType == SHARD_TRANSFER_MOVE)
|
||||||
|
{
|
||||||
/* delete old shards metadata and mark the shards as to be deferred drop */
|
/* delete old shards metadata and mark the shards as to be deferred drop */
|
||||||
int32 sourceGroupId = GroupForNode(sourceNodeName, sourceNodePort);
|
int32 sourceGroupId = GroupForNode(sourceNodeName, sourceNodePort);
|
||||||
InsertCleanupRecordsForShardPlacementsOnNode(colocatedShardList,
|
InsertCleanupRecordsForShardPlacementsOnNode(colocatedShardList,
|
||||||
sourceGroupId);
|
sourceGroupId);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Finally insert the placements to pg_dist_placement and sync it to the
|
||||||
|
* metadata workers.
|
||||||
|
*/
|
||||||
colocatedShard = NULL;
|
colocatedShard = NULL;
|
||||||
foreach_ptr(colocatedShard, colocatedShardList)
|
foreach_ptr(colocatedShard, colocatedShardList)
|
||||||
{
|
{
|
||||||
|
@ -488,17 +527,30 @@ citus_move_shard_placement_internal(int64 shardId, char *sourceNodeName,
|
||||||
InsertShardPlacementRow(colocatedShardId, placementId,
|
InsertShardPlacementRow(colocatedShardId, placementId,
|
||||||
ShardLength(colocatedShardId),
|
ShardLength(colocatedShardId),
|
||||||
groupId);
|
groupId);
|
||||||
|
|
||||||
|
if (transferType == SHARD_TRANSFER_COPY &&
|
||||||
|
ShouldSyncTableMetadata(colocatedShard->relationId))
|
||||||
|
{
|
||||||
|
char *placementCommand = PlacementUpsertCommand(colocatedShardId, placementId,
|
||||||
|
0, groupId);
|
||||||
|
|
||||||
|
SendCommandToWorkersWithMetadata(placementCommand);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (transferType == SHARD_TRANSFER_MOVE)
|
||||||
|
{
|
||||||
/*
|
/*
|
||||||
* Since this is move operation, we remove the placements from the metadata
|
* Since this is move operation, we remove the placements from the metadata
|
||||||
* for the source node after copy.
|
* for the source node after copy.
|
||||||
*/
|
*/
|
||||||
DropShardPlacementsFromMetadata(colocatedShardList, sourceNodeName, sourceNodePort);
|
DropShardPlacementsFromMetadata(colocatedShardList,
|
||||||
|
sourceNodeName, sourceNodePort);
|
||||||
|
|
||||||
UpdateColocatedShardPlacementMetadataOnWorkers(shardId, sourceNodeName,
|
UpdateColocatedShardPlacementMetadataOnWorkers(shardId, sourceNodeName,
|
||||||
sourceNodePort, targetNodeName,
|
sourceNodePort, targetNodeName,
|
||||||
targetNodePort);
|
targetNodePort);
|
||||||
|
}
|
||||||
|
|
||||||
UpdatePlacementUpdateStatusForShardIntervalList(
|
UpdatePlacementUpdateStatusForShardIntervalList(
|
||||||
colocatedShardList,
|
colocatedShardList,
|
||||||
|
@ -611,6 +663,70 @@ IsShardListOnNode(List *colocatedShardList, char *targetNodeName, uint32 targetN
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* LockColocatedRelationsForMove takes a list of relations, locks all of them
|
||||||
|
* using ShareUpdateExclusiveLock
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
LockColocatedRelationsForMove(List *colocatedTableList)
|
||||||
|
{
|
||||||
|
Oid colocatedTableId = InvalidOid;
|
||||||
|
foreach_oid(colocatedTableId, colocatedTableList)
|
||||||
|
{
|
||||||
|
LockRelationOid(colocatedTableId, ShareUpdateExclusiveLock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ErrorIfForeignTableForShardTransfer takes a list of relations, errors out if
|
||||||
|
* there's a foreign table in the list.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
ErrorIfForeignTableForShardTransfer(List *colocatedTableList,
|
||||||
|
ShardTransferType transferType)
|
||||||
|
{
|
||||||
|
Oid colocatedTableId = InvalidOid;
|
||||||
|
foreach_oid(colocatedTableId, colocatedTableList)
|
||||||
|
{
|
||||||
|
if (IsForeignTable(colocatedTableId))
|
||||||
|
{
|
||||||
|
char *relationName = get_rel_name(colocatedTableId);
|
||||||
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg("cannot %s shard",
|
||||||
|
ShardTransferTypeNames[transferType]),
|
||||||
|
errdetail("Table %s is a foreign table. "
|
||||||
|
"%s shards backed by foreign tables is "
|
||||||
|
"not supported.", relationName,
|
||||||
|
ShardTransferTypeNamesContinuous[transferType])));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* EnsureAllShardsCanBeCopied is a wrapper around EnsureShardCanBeCopied.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
EnsureAllShardsCanBeCopied(List *colocatedShardList,
|
||||||
|
char *sourceNodeName, uint32 sourceNodePort,
|
||||||
|
char *targetNodeName, uint32 targetNodePort)
|
||||||
|
{
|
||||||
|
ShardInterval *colocatedShard = NULL;
|
||||||
|
foreach_ptr(colocatedShard, colocatedShardList)
|
||||||
|
{
|
||||||
|
uint64 colocatedShardId = colocatedShard->shardId;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* To transfer shard, there should be healthy placement in source node and no
|
||||||
|
* placement in the target node.
|
||||||
|
*/
|
||||||
|
EnsureShardCanBeCopied(colocatedShardId, sourceNodeName, sourceNodePort,
|
||||||
|
targetNodeName, targetNodePort);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* EnsureEnoughDiskSpaceForShardMove checks that there is enough space for
|
* EnsureEnoughDiskSpaceForShardMove checks that there is enough space for
|
||||||
* shard moves of the given colocated shard list from source node to target node.
|
* shard moves of the given colocated shard list from source node to target node.
|
||||||
|
@ -619,9 +735,10 @@ IsShardListOnNode(List *colocatedShardList, char *targetNodeName, uint32 targetN
|
||||||
static void
|
static void
|
||||||
EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList,
|
EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList,
|
||||||
char *sourceNodeName, uint32 sourceNodePort,
|
char *sourceNodeName, uint32 sourceNodePort,
|
||||||
char *targetNodeName, uint32 targetNodePort)
|
char *targetNodeName, uint32 targetNodePort,
|
||||||
|
ShardTransferType transferType)
|
||||||
{
|
{
|
||||||
if (!CheckAvailableSpaceBeforeMove)
|
if (!CheckAvailableSpaceBeforeMove || transferType != SHARD_TRANSFER_MOVE)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -636,6 +753,34 @@ EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TransferAlreadyCompleted returns true if the given shard transfer is already done.
|
||||||
|
* Returns false otherwise.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
TransferAlreadyCompleted(List *colocatedShardList,
|
||||||
|
char *sourceNodeName, uint32 sourceNodePort,
|
||||||
|
char *targetNodeName, uint32 targetNodePort,
|
||||||
|
ShardTransferType transferType)
|
||||||
|
{
|
||||||
|
if (transferType == SHARD_TRANSFER_MOVE &&
|
||||||
|
IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) &&
|
||||||
|
!IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (transferType == SHARD_TRANSFER_COPY &&
|
||||||
|
IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) &&
|
||||||
|
IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ShardListSizeInBytes returns the size in bytes of a set of shard tables.
|
* ShardListSizeInBytes returns the size in bytes of a set of shard tables.
|
||||||
*/
|
*/
|
||||||
|
@ -682,6 +827,49 @@ ShardListSizeInBytes(List *shardList, char *workerNodeName, uint32
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SetupRebalanceMonitorForShardTransfer prepares the parameters and
|
||||||
|
* calls SetupRebalanceMonitor, unless the current transfer is a move
|
||||||
|
* initiated by the rebalancer.
|
||||||
|
* See comments on SetupRebalanceMonitor
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
SetupRebalanceMonitorForShardTransfer(uint64 shardId, Oid distributedTableId,
|
||||||
|
char *sourceNodeName, uint32 sourceNodePort,
|
||||||
|
char *targetNodeName, uint32 targetNodePort,
|
||||||
|
ShardTransferType transferType)
|
||||||
|
{
|
||||||
|
if (transferType == SHARD_TRANSFER_MOVE && IsRebalancerInternalBackend())
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We want to be able to track progress of shard moves using
|
||||||
|
* get_rebalancer_progress. If this move is initiated by the rebalancer,
|
||||||
|
* then the rebalancer call has already set up the shared memory that is
|
||||||
|
* used to do that, so we should return here.
|
||||||
|
* But if citus_move_shard_placement is called directly by the user
|
||||||
|
* (or through any other mechanism), then the shared memory is not
|
||||||
|
* set up yet. In that case we do it here.
|
||||||
|
*/
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort);
|
||||||
|
WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort);
|
||||||
|
|
||||||
|
PlacementUpdateEvent *placementUpdateEvent = palloc0(
|
||||||
|
sizeof(PlacementUpdateEvent));
|
||||||
|
placementUpdateEvent->updateType =
|
||||||
|
transferType == SHARD_TRANSFER_COPY ? PLACEMENT_UPDATE_COPY :
|
||||||
|
PLACEMENT_UPDATE_MOVE;
|
||||||
|
placementUpdateEvent->shardId = shardId;
|
||||||
|
placementUpdateEvent->sourceNode = sourceNode;
|
||||||
|
placementUpdateEvent->targetNode = targetNode;
|
||||||
|
SetupRebalanceMonitor(list_make1(placementUpdateEvent), distributedTableId,
|
||||||
|
REBALANCE_PROGRESS_MOVING,
|
||||||
|
PLACEMENT_UPDATE_STATUS_SETTING_UP);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CheckSpaceConstraints checks there is enough space to place the colocation
|
* CheckSpaceConstraints checks there is enough space to place the colocation
|
||||||
* on the node that the connection is connected to.
|
* on the node that the connection is connected to.
|
||||||
|
@ -729,17 +917,19 @@ CheckSpaceConstraints(MultiConnection *connection, uint64 colocationSizeInBytes)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ErrorIfTargetNodeIsNotSafeToMove throws error if the target node is not
|
* ErrorIfTargetNodeIsNotSafeForTransfer throws error if the target node is not
|
||||||
* eligible for moving shards.
|
* eligible for shard transfers.
|
||||||
*/
|
*/
|
||||||
void
|
static void
|
||||||
ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int targetNodePort)
|
ErrorIfTargetNodeIsNotSafeForTransfer(const char *targetNodeName, int targetNodePort,
|
||||||
|
ShardTransferType transferType)
|
||||||
{
|
{
|
||||||
WorkerNode *workerNode = FindWorkerNode(targetNodeName, targetNodePort);
|
WorkerNode *workerNode = FindWorkerNode(targetNodeName, targetNodePort);
|
||||||
if (workerNode == NULL)
|
if (workerNode == NULL)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
errmsg("Moving shards to a non-existing node is not supported"),
|
errmsg("%s shards to a non-existing node is not supported",
|
||||||
|
ShardTransferTypeNamesContinuous[transferType]),
|
||||||
errhint(
|
errhint(
|
||||||
"Add the target node via SELECT citus_add_node('%s', %d);",
|
"Add the target node via SELECT citus_add_node('%s', %d);",
|
||||||
targetNodeName, targetNodePort)));
|
targetNodeName, targetNodePort)));
|
||||||
|
@ -748,13 +938,14 @@ ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int targetNodePort)
|
||||||
if (!workerNode->isActive)
|
if (!workerNode->isActive)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
errmsg("Moving shards to a non-active node is not supported"),
|
errmsg("%s shards to a non-active node is not supported",
|
||||||
|
ShardTransferTypeNamesContinuous[transferType]),
|
||||||
errhint(
|
errhint(
|
||||||
"Activate the target node via SELECT citus_activate_node('%s', %d);",
|
"Activate the target node via SELECT citus_activate_node('%s', %d);",
|
||||||
targetNodeName, targetNodePort)));
|
targetNodeName, targetNodePort)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!workerNode->shouldHaveShards)
|
if (transferType == SHARD_TRANSFER_MOVE && !workerNode->shouldHaveShards)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
errmsg("Moving shards to a node that shouldn't have a shard is "
|
errmsg("Moving shards to a node that shouldn't have a shard is "
|
||||||
|
@ -767,8 +958,9 @@ ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int targetNodePort)
|
||||||
if (!NodeIsPrimary(workerNode))
|
if (!NodeIsPrimary(workerNode))
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
errmsg("Moving shards to a secondary (e.g., replica) node is "
|
errmsg("%s shards to a secondary (e.g., replica) node is "
|
||||||
"not supported")));
|
"not supported",
|
||||||
|
ShardTransferTypeNamesContinuous[transferType])));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1046,41 +1238,6 @@ ErrorIfTableCannotBeReplicated(Oid relationId)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* ErrorIfTargetNodeIsNotSafeToCopyTo throws an error if the target node is not
|
|
||||||
* eligible for copying shards.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
ErrorIfTargetNodeIsNotSafeToCopyTo(const char *targetNodeName, int targetNodePort)
|
|
||||||
{
|
|
||||||
WorkerNode *workerNode = FindWorkerNode(targetNodeName, targetNodePort);
|
|
||||||
if (workerNode == NULL)
|
|
||||||
{
|
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
||||||
errmsg("Copying shards to a non-existing node is not supported"),
|
|
||||||
errhint(
|
|
||||||
"Add the target node via SELECT citus_add_node('%s', %d);",
|
|
||||||
targetNodeName, targetNodePort)));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!workerNode->isActive)
|
|
||||||
{
|
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
||||||
errmsg("Copying shards to a non-active node is not supported"),
|
|
||||||
errhint(
|
|
||||||
"Activate the target node via SELECT citus_activate_node('%s', %d);",
|
|
||||||
targetNodeName, targetNodePort)));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!NodeIsPrimary(workerNode))
|
|
||||||
{
|
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
||||||
errmsg("Copying shards to a secondary (e.g., replica) node is "
|
|
||||||
"not supported")));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* LookupShardTransferMode maps the oids of citus.shard_transfer_mode enum
|
* LookupShardTransferMode maps the oids of citus.shard_transfer_mode enum
|
||||||
* values to a char.
|
* values to a char.
|
||||||
|
@ -1114,154 +1271,6 @@ LookupShardTransferMode(Oid shardReplicationModeOid)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* ReplicateColocatedShardPlacement replicates the given shard and its
|
|
||||||
* colocated shards from a source node to target node.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName,
|
|
||||||
int32 sourceNodePort, char *targetNodeName,
|
|
||||||
int32 targetNodePort, char shardReplicationMode)
|
|
||||||
{
|
|
||||||
ShardInterval *shardInterval = LoadShardInterval(shardId);
|
|
||||||
Oid distributedTableId = shardInterval->relationId;
|
|
||||||
|
|
||||||
ErrorIfSameNode(sourceNodeName, sourceNodePort,
|
|
||||||
targetNodeName, targetNodePort,
|
|
||||||
"copy");
|
|
||||||
|
|
||||||
ErrorIfTableCannotBeReplicated(shardInterval->relationId);
|
|
||||||
ErrorIfTargetNodeIsNotSafeToCopyTo(targetNodeName, targetNodePort);
|
|
||||||
EnsureNoModificationsHaveBeenDone();
|
|
||||||
|
|
||||||
AcquirePlacementColocationLock(shardInterval->relationId, ExclusiveLock, "copy");
|
|
||||||
|
|
||||||
List *colocatedTableList = ColocatedTableList(distributedTableId);
|
|
||||||
List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
|
|
||||||
|
|
||||||
EnsureTableListOwner(colocatedTableList);
|
|
||||||
EnsureTableListSuitableForReplication(colocatedTableList);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We sort shardIntervalList so that lock operations will not cause any
|
|
||||||
* deadlocks.
|
|
||||||
*/
|
|
||||||
colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If there are active placements on both nodes, we assume the copy to already
|
|
||||||
* be done.
|
|
||||||
*/
|
|
||||||
if (IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) &&
|
|
||||||
IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort))
|
|
||||||
{
|
|
||||||
ereport(WARNING, (errmsg("shard is already present on node %s:%d",
|
|
||||||
targetNodeName, targetNodePort),
|
|
||||||
errdetail("Copy may have already completed.")));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort);
|
|
||||||
WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort);
|
|
||||||
|
|
||||||
Oid relationId = RelationIdForShard(shardId);
|
|
||||||
PlacementUpdateEvent *placementUpdateEvent = palloc0(
|
|
||||||
sizeof(PlacementUpdateEvent));
|
|
||||||
placementUpdateEvent->updateType = PLACEMENT_UPDATE_COPY;
|
|
||||||
placementUpdateEvent->shardId = shardId;
|
|
||||||
placementUpdateEvent->sourceNode = sourceNode;
|
|
||||||
placementUpdateEvent->targetNode = targetNode;
|
|
||||||
SetupRebalanceMonitor(list_make1(placementUpdateEvent), relationId,
|
|
||||||
REBALANCE_PROGRESS_MOVING,
|
|
||||||
PLACEMENT_UPDATE_STATUS_SETTING_UP);
|
|
||||||
|
|
||||||
UpdatePlacementUpdateStatusForShardIntervalList(
|
|
||||||
colocatedShardList,
|
|
||||||
sourceNodeName,
|
|
||||||
sourceNodePort,
|
|
||||||
PLACEMENT_UPDATE_STATUS_SETTING_UP);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* At this point of the shard replication, we don't need to block the writes to
|
|
||||||
* shards when logical replication is used.
|
|
||||||
*/
|
|
||||||
bool useLogicalReplication = CanUseLogicalReplication(distributedTableId,
|
|
||||||
shardReplicationMode);
|
|
||||||
if (!useLogicalReplication)
|
|
||||||
{
|
|
||||||
BlockWritesToShardList(colocatedShardList);
|
|
||||||
}
|
|
||||||
|
|
||||||
ShardInterval *colocatedShard = NULL;
|
|
||||||
foreach_ptr(colocatedShard, colocatedShardList)
|
|
||||||
{
|
|
||||||
uint64 colocatedShardId = colocatedShard->shardId;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* For shard copy, there should be healthy placement in source node and no
|
|
||||||
* placement in the target node.
|
|
||||||
*/
|
|
||||||
EnsureShardCanBeCopied(colocatedShardId, sourceNodeName, sourceNodePort,
|
|
||||||
targetNodeName, targetNodePort);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (shardReplicationMode == TRANSFER_MODE_AUTOMATIC)
|
|
||||||
{
|
|
||||||
VerifyTablesHaveReplicaIdentity(colocatedTableList);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!IsCitusTableType(distributedTableId, REFERENCE_TABLE))
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* When copying a shard to a new node, we should first ensure that reference
|
|
||||||
* tables are present such that joins work immediately after copying the shard.
|
|
||||||
* When copying a reference table, we are probably trying to achieve just that.
|
|
||||||
*
|
|
||||||
* Since this a long-running operation we do this after the error checks, but
|
|
||||||
* before taking metadata locks.
|
|
||||||
*/
|
|
||||||
EnsureReferenceTablesExistOnAllNodesExtended(shardReplicationMode);
|
|
||||||
}
|
|
||||||
|
|
||||||
DropOrphanedResourcesInSeparateTransaction();
|
|
||||||
|
|
||||||
CopyShardTables(colocatedShardList, sourceNodeName, sourceNodePort,
|
|
||||||
targetNodeName, targetNodePort, useLogicalReplication,
|
|
||||||
"citus_copy_shard_placement");
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Finally insert the placements to pg_dist_placement and sync it to the
|
|
||||||
* metadata workers.
|
|
||||||
*/
|
|
||||||
foreach_ptr(colocatedShard, colocatedShardList)
|
|
||||||
{
|
|
||||||
uint64 colocatedShardId = colocatedShard->shardId;
|
|
||||||
uint32 groupId = GroupForNode(targetNodeName, targetNodePort);
|
|
||||||
uint64 placementId = GetNextPlacementId();
|
|
||||||
|
|
||||||
InsertShardPlacementRow(colocatedShardId, placementId,
|
|
||||||
ShardLength(colocatedShardId),
|
|
||||||
groupId);
|
|
||||||
|
|
||||||
if (ShouldSyncTableMetadata(colocatedShard->relationId))
|
|
||||||
{
|
|
||||||
char *placementCommand = PlacementUpsertCommand(colocatedShardId, placementId,
|
|
||||||
0, groupId);
|
|
||||||
|
|
||||||
SendCommandToWorkersWithMetadata(placementCommand);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
UpdatePlacementUpdateStatusForShardIntervalList(
|
|
||||||
colocatedShardList,
|
|
||||||
sourceNodeName,
|
|
||||||
sourceNodePort,
|
|
||||||
PLACEMENT_UPDATE_STATUS_COMPLETED);
|
|
||||||
|
|
||||||
FinalizeCurrentProgressMonitor();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* EnsureTableListOwner ensures current user owns given tables. Superusers
|
* EnsureTableListOwner ensures current user owns given tables. Superusers
|
||||||
* are regarded as owners.
|
* are regarded as owners.
|
||||||
|
@ -1278,25 +1287,15 @@ EnsureTableListOwner(List *tableIdList)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* EnsureTableListSuitableForReplication errors out if given tables are not
|
* ErrorIfReplicatingDistributedTableWithFKeys errors out if given tables are not
|
||||||
* suitable for replication.
|
* suitable for replication.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
EnsureTableListSuitableForReplication(List *tableIdList)
|
ErrorIfReplicatingDistributedTableWithFKeys(List *tableIdList)
|
||||||
{
|
{
|
||||||
Oid tableId = InvalidOid;
|
Oid tableId = InvalidOid;
|
||||||
foreach_oid(tableId, tableIdList)
|
foreach_oid(tableId, tableIdList)
|
||||||
{
|
{
|
||||||
if (IsForeignTable(tableId))
|
|
||||||
{
|
|
||||||
char *relationName = get_rel_name(tableId);
|
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
||||||
errmsg("cannot replicate shard"),
|
|
||||||
errdetail("Table %s is a foreign table. Replicating "
|
|
||||||
"shards backed by foreign tables is "
|
|
||||||
"not supported.", relationName)));
|
|
||||||
}
|
|
||||||
|
|
||||||
List *foreignConstraintCommandList =
|
List *foreignConstraintCommandList =
|
||||||
GetReferencingForeignConstaintCommands(tableId);
|
GetReferencingForeignConstaintCommands(tableId);
|
||||||
|
|
||||||
|
@ -1318,7 +1317,7 @@ EnsureTableListSuitableForReplication(List *tableIdList)
|
||||||
static void
|
static void
|
||||||
CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodePort,
|
CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodePort,
|
||||||
char *targetNodeName, int32 targetNodePort, bool useLogicalReplication,
|
char *targetNodeName, int32 targetNodePort, bool useLogicalReplication,
|
||||||
char *operationName)
|
const char *operationName)
|
||||||
{
|
{
|
||||||
if (list_length(shardIntervalList) < 1)
|
if (list_length(shardIntervalList) < 1)
|
||||||
{
|
{
|
||||||
|
|
|
@ -53,8 +53,14 @@ worker_copy_table_to_node(PG_FUNCTION_ARGS)
|
||||||
targetNodeId);
|
targetNodeId);
|
||||||
|
|
||||||
StringInfo selectShardQueryForCopy = makeStringInfo();
|
StringInfo selectShardQueryForCopy = makeStringInfo();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Even though we do COPY(SELECT ...) all the columns, we can't just do SELECT * because we need to not COPY generated colums.
|
||||||
|
*/
|
||||||
|
const char *columnList = CopyableColumnNamesFromRelationName(relationSchemaName,
|
||||||
|
relationName);
|
||||||
appendStringInfo(selectShardQueryForCopy,
|
appendStringInfo(selectShardQueryForCopy,
|
||||||
"SELECT * FROM %s;", relationQualifiedName);
|
"SELECT %s FROM %s;", columnList, relationQualifiedName);
|
||||||
|
|
||||||
ParamListInfo params = NULL;
|
ParamListInfo params = NULL;
|
||||||
ExecuteQueryStringIntoDestReceiver(selectShardQueryForCopy->data, params,
|
ExecuteQueryStringIntoDestReceiver(selectShardQueryForCopy->data, params,
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#include "distributed/relation_utils.h"
|
#include "distributed/relation_utils.h"
|
||||||
#include "distributed/version_compat.h"
|
#include "distributed/version_compat.h"
|
||||||
#include "distributed/local_executor.h"
|
#include "distributed/local_executor.h"
|
||||||
|
#include "distributed/replication_origin_session_utils.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* LocalCopyBuffer is used in copy callback to return the copied rows.
|
* LocalCopyBuffer is used in copy callback to return the copied rows.
|
||||||
|
@ -73,13 +74,14 @@ static void ShardCopyDestReceiverDestroy(DestReceiver *destReceiver);
|
||||||
static bool CanUseLocalCopy(uint32_t destinationNodeId);
|
static bool CanUseLocalCopy(uint32_t destinationNodeId);
|
||||||
static StringInfo ConstructShardCopyStatement(List *destinationShardFullyQualifiedName,
|
static StringInfo ConstructShardCopyStatement(List *destinationShardFullyQualifiedName,
|
||||||
bool
|
bool
|
||||||
useBinaryFormat);
|
useBinaryFormat, TupleDesc tupleDesc);
|
||||||
static void WriteLocalTuple(TupleTableSlot *slot, ShardCopyDestReceiver *copyDest);
|
static void WriteLocalTuple(TupleTableSlot *slot, ShardCopyDestReceiver *copyDest);
|
||||||
static int ReadFromLocalBufferCallback(void *outBuf, int minRead, int maxRead);
|
static int ReadFromLocalBufferCallback(void *outBuf, int minRead, int maxRead);
|
||||||
static void LocalCopyToShard(ShardCopyDestReceiver *copyDest, CopyOutState
|
static void LocalCopyToShard(ShardCopyDestReceiver *copyDest, CopyOutState
|
||||||
localCopyOutState);
|
localCopyOutState);
|
||||||
static void ConnectToRemoteAndStartCopy(ShardCopyDestReceiver *copyDest);
|
static void ConnectToRemoteAndStartCopy(ShardCopyDestReceiver *copyDest);
|
||||||
|
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
CanUseLocalCopy(uint32_t destinationNodeId)
|
CanUseLocalCopy(uint32_t destinationNodeId)
|
||||||
{
|
{
|
||||||
|
@ -103,9 +105,16 @@ ConnectToRemoteAndStartCopy(ShardCopyDestReceiver *copyDest)
|
||||||
NULL /* database (current) */);
|
NULL /* database (current) */);
|
||||||
ClaimConnectionExclusively(copyDest->connection);
|
ClaimConnectionExclusively(copyDest->connection);
|
||||||
|
|
||||||
|
|
||||||
|
RemoteTransactionBeginIfNecessary(copyDest->connection);
|
||||||
|
|
||||||
|
SetupReplicationOriginRemoteSession(copyDest->connection);
|
||||||
|
|
||||||
|
|
||||||
StringInfo copyStatement = ConstructShardCopyStatement(
|
StringInfo copyStatement = ConstructShardCopyStatement(
|
||||||
copyDest->destinationShardFullyQualifiedName,
|
copyDest->destinationShardFullyQualifiedName,
|
||||||
copyDest->copyOutState->binary);
|
copyDest->copyOutState->binary,
|
||||||
|
copyDest->tupleDescriptor);
|
||||||
|
|
||||||
if (!SendRemoteCommand(copyDest->connection, copyStatement->data))
|
if (!SendRemoteCommand(copyDest->connection, copyStatement->data))
|
||||||
{
|
{
|
||||||
|
@ -184,6 +193,8 @@ ShardCopyDestReceiverReceive(TupleTableSlot *slot, DestReceiver *dest)
|
||||||
CopyOutState copyOutState = copyDest->copyOutState;
|
CopyOutState copyOutState = copyDest->copyOutState;
|
||||||
if (copyDest->useLocalCopy)
|
if (copyDest->useLocalCopy)
|
||||||
{
|
{
|
||||||
|
/* Setup replication origin session for local copy*/
|
||||||
|
|
||||||
WriteLocalTuple(slot, copyDest);
|
WriteLocalTuple(slot, copyDest);
|
||||||
if (copyOutState->fe_msgbuf->len > LocalCopyFlushThresholdByte)
|
if (copyOutState->fe_msgbuf->len > LocalCopyFlushThresholdByte)
|
||||||
{
|
{
|
||||||
|
@ -259,6 +270,11 @@ ShardCopyDestReceiverStartup(DestReceiver *dest, int operation, TupleDesc
|
||||||
copyDest->columnOutputFunctions = ColumnOutputFunctions(inputTupleDescriptor,
|
copyDest->columnOutputFunctions = ColumnOutputFunctions(inputTupleDescriptor,
|
||||||
copyOutState->binary);
|
copyOutState->binary);
|
||||||
copyDest->copyOutState = copyOutState;
|
copyDest->copyOutState = copyOutState;
|
||||||
|
if (copyDest->useLocalCopy)
|
||||||
|
{
|
||||||
|
/* Setup replication origin session for local copy*/
|
||||||
|
SetupReplicationOriginLocalSession();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -317,6 +333,9 @@ ShardCopyDestReceiverShutdown(DestReceiver *dest)
|
||||||
|
|
||||||
PQclear(result);
|
PQclear(result);
|
||||||
ForgetResults(copyDest->connection);
|
ForgetResults(copyDest->connection);
|
||||||
|
|
||||||
|
ResetReplicationOriginRemoteSession(copyDest->connection);
|
||||||
|
|
||||||
CloseConnection(copyDest->connection);
|
CloseConnection(copyDest->connection);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -329,6 +348,10 @@ static void
|
||||||
ShardCopyDestReceiverDestroy(DestReceiver *dest)
|
ShardCopyDestReceiverDestroy(DestReceiver *dest)
|
||||||
{
|
{
|
||||||
ShardCopyDestReceiver *copyDest = (ShardCopyDestReceiver *) dest;
|
ShardCopyDestReceiver *copyDest = (ShardCopyDestReceiver *) dest;
|
||||||
|
if (copyDest->useLocalCopy)
|
||||||
|
{
|
||||||
|
ResetReplicationOriginLocalSession();
|
||||||
|
}
|
||||||
|
|
||||||
if (copyDest->copyOutState)
|
if (copyDest->copyOutState)
|
||||||
{
|
{
|
||||||
|
@ -344,21 +367,80 @@ ShardCopyDestReceiverDestroy(DestReceiver *dest)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CopyableColumnNamesFromTupleDesc function creates and returns a comma seperated column names string to be used in COPY
|
||||||
|
* and SELECT statements when copying a table. The COPY and SELECT statements should filter out the GENERATED columns since COPY
|
||||||
|
* statement fails to handle them. Iterating over the attributes of the table we also need to skip the dropped columns.
|
||||||
|
*/
|
||||||
|
const char *
|
||||||
|
CopyableColumnNamesFromTupleDesc(TupleDesc tupDesc)
|
||||||
|
{
|
||||||
|
StringInfo columnList = makeStringInfo();
|
||||||
|
bool firstInList = true;
|
||||||
|
|
||||||
|
for (int i = 0; i < tupDesc->natts; i++)
|
||||||
|
{
|
||||||
|
Form_pg_attribute att = TupleDescAttr(tupDesc, i);
|
||||||
|
if (att->attgenerated || att->attisdropped)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!firstInList)
|
||||||
|
{
|
||||||
|
appendStringInfo(columnList, ",");
|
||||||
|
}
|
||||||
|
|
||||||
|
firstInList = false;
|
||||||
|
|
||||||
|
appendStringInfo(columnList, "%s", quote_identifier(NameStr(att->attname)));
|
||||||
|
}
|
||||||
|
|
||||||
|
return columnList->data;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CopyableColumnNamesFromRelationName function is a wrapper for CopyableColumnNamesFromTupleDesc.
|
||||||
|
*/
|
||||||
|
const char *
|
||||||
|
CopyableColumnNamesFromRelationName(const char *schemaName, const char *relationName)
|
||||||
|
{
|
||||||
|
Oid namespaceOid = get_namespace_oid(schemaName, true);
|
||||||
|
|
||||||
|
Oid relationId = get_relname_relid(relationName, namespaceOid);
|
||||||
|
|
||||||
|
Relation relation = relation_open(relationId, AccessShareLock);
|
||||||
|
|
||||||
|
TupleDesc tupleDesc = RelationGetDescr(relation);
|
||||||
|
|
||||||
|
const char *columnList = CopyableColumnNamesFromTupleDesc(tupleDesc);
|
||||||
|
|
||||||
|
relation_close(relation, NoLock);
|
||||||
|
|
||||||
|
return columnList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ConstructShardCopyStatement constructs the text of a COPY statement
|
* ConstructShardCopyStatement constructs the text of a COPY statement
|
||||||
* for copying into a result table
|
* for copying into a result table
|
||||||
*/
|
*/
|
||||||
static StringInfo
|
static StringInfo
|
||||||
ConstructShardCopyStatement(List *destinationShardFullyQualifiedName, bool
|
ConstructShardCopyStatement(List *destinationShardFullyQualifiedName, bool
|
||||||
useBinaryFormat)
|
useBinaryFormat,
|
||||||
|
TupleDesc tupleDesc)
|
||||||
{
|
{
|
||||||
char *destinationShardSchemaName = linitial(destinationShardFullyQualifiedName);
|
char *destinationShardSchemaName = linitial(destinationShardFullyQualifiedName);
|
||||||
char *destinationShardRelationName = lsecond(destinationShardFullyQualifiedName);
|
char *destinationShardRelationName = lsecond(destinationShardFullyQualifiedName);
|
||||||
|
|
||||||
|
|
||||||
StringInfo command = makeStringInfo();
|
StringInfo command = makeStringInfo();
|
||||||
appendStringInfo(command, "COPY %s.%s FROM STDIN",
|
|
||||||
|
const char *columnList = CopyableColumnNamesFromTupleDesc(tupleDesc);
|
||||||
|
|
||||||
|
appendStringInfo(command, "COPY %s.%s (%s) FROM STDIN",
|
||||||
quote_identifier(destinationShardSchemaName), quote_identifier(
|
quote_identifier(destinationShardSchemaName), quote_identifier(
|
||||||
destinationShardRelationName));
|
destinationShardRelationName), columnList);
|
||||||
|
|
||||||
if (useBinaryFormat)
|
if (useBinaryFormat)
|
||||||
{
|
{
|
||||||
|
|
|
@ -110,8 +110,13 @@ worker_split_copy(PG_FUNCTION_ARGS)
|
||||||
splitCopyInfoList))));
|
splitCopyInfoList))));
|
||||||
|
|
||||||
StringInfo selectShardQueryForCopy = makeStringInfo();
|
StringInfo selectShardQueryForCopy = makeStringInfo();
|
||||||
|
const char *columnList = CopyableColumnNamesFromRelationName(
|
||||||
|
sourceShardToCopySchemaName,
|
||||||
|
sourceShardToCopyName);
|
||||||
|
|
||||||
appendStringInfo(selectShardQueryForCopy,
|
appendStringInfo(selectShardQueryForCopy,
|
||||||
"SELECT * FROM %s;", sourceShardToCopyQualifiedName);
|
"SELECT %s FROM %s;", columnList,
|
||||||
|
sourceShardToCopyQualifiedName);
|
||||||
|
|
||||||
ParamListInfo params = NULL;
|
ParamListInfo params = NULL;
|
||||||
ExecuteQueryStringIntoDestReceiver(selectShardQueryForCopy->data, params,
|
ExecuteQueryStringIntoDestReceiver(selectShardQueryForCopy->data, params,
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
#include "distributed/intermediate_results.h"
|
#include "distributed/intermediate_results.h"
|
||||||
#include "distributed/listutils.h"
|
#include "distributed/listutils.h"
|
||||||
#include "distributed/coordinator_protocol.h"
|
#include "distributed/coordinator_protocol.h"
|
||||||
|
#include "distributed/merge_planner.h"
|
||||||
#include "distributed/metadata_cache.h"
|
#include "distributed/metadata_cache.h"
|
||||||
#include "distributed/multi_executor.h"
|
#include "distributed/multi_executor.h"
|
||||||
#include "distributed/distributed_planner.h"
|
#include "distributed/distributed_planner.h"
|
||||||
|
@ -68,6 +69,17 @@
|
||||||
#include "utils/syscache.h"
|
#include "utils/syscache.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* RouterPlanType is used to determine the router plan to invoke */
|
||||||
|
typedef enum RouterPlanType
|
||||||
|
{
|
||||||
|
INSERT_SELECT_INTO_CITUS_TABLE,
|
||||||
|
INSERT_SELECT_INTO_LOCAL_TABLE,
|
||||||
|
DML_QUERY,
|
||||||
|
SELECT_QUERY,
|
||||||
|
MERGE_QUERY,
|
||||||
|
REPLAN_WITH_BOUND_PARAMETERS
|
||||||
|
} RouterPlanType;
|
||||||
|
|
||||||
static List *plannerRestrictionContextList = NIL;
|
static List *plannerRestrictionContextList = NIL;
|
||||||
int MultiTaskQueryLogLevel = CITUS_LOG_LEVEL_OFF; /* multi-task query log level */
|
int MultiTaskQueryLogLevel = CITUS_LOG_LEVEL_OFF; /* multi-task query log level */
|
||||||
static uint64 NextPlanId = 1;
|
static uint64 NextPlanId = 1;
|
||||||
|
@ -75,12 +87,8 @@ static uint64 NextPlanId = 1;
|
||||||
/* keep track of planner call stack levels */
|
/* keep track of planner call stack levels */
|
||||||
int PlannerLevel = 0;
|
int PlannerLevel = 0;
|
||||||
|
|
||||||
static void ErrorIfQueryHasUnsupportedMergeCommand(Query *queryTree,
|
|
||||||
List *rangeTableList);
|
|
||||||
static bool ContainsMergeCommandWalker(Node *node);
|
|
||||||
static bool ListContainsDistributedTableRTE(List *rangeTableList,
|
static bool ListContainsDistributedTableRTE(List *rangeTableList,
|
||||||
bool *maybeHasForeignDistributedTable);
|
bool *maybeHasForeignDistributedTable);
|
||||||
static bool IsUpdateOrDelete(Query *query);
|
|
||||||
static PlannedStmt * CreateDistributedPlannedStmt(
|
static PlannedStmt * CreateDistributedPlannedStmt(
|
||||||
DistributedPlanningContext *planContext);
|
DistributedPlanningContext *planContext);
|
||||||
static PlannedStmt * InlineCtesAndCreateDistributedPlannedStmt(uint64 planId,
|
static PlannedStmt * InlineCtesAndCreateDistributedPlannedStmt(uint64 planId,
|
||||||
|
@ -132,7 +140,10 @@ static PlannedStmt * PlanDistributedStmt(DistributedPlanningContext *planContext
|
||||||
static RTEListProperties * GetRTEListProperties(List *rangeTableList);
|
static RTEListProperties * GetRTEListProperties(List *rangeTableList);
|
||||||
static List * TranslatedVars(PlannerInfo *root, int relationIndex);
|
static List * TranslatedVars(PlannerInfo *root, int relationIndex);
|
||||||
static void WarnIfListHasForeignDistributedTable(List *rangeTableList);
|
static void WarnIfListHasForeignDistributedTable(List *rangeTableList);
|
||||||
static void ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList);
|
static RouterPlanType GetRouterPlanType(Query *query,
|
||||||
|
Query *originalQuery,
|
||||||
|
bool hasUnresolvedParams);
|
||||||
|
|
||||||
|
|
||||||
/* Distributed planner hook */
|
/* Distributed planner hook */
|
||||||
PlannedStmt *
|
PlannedStmt *
|
||||||
|
@ -156,7 +167,7 @@ distributed_planner(Query *parse,
|
||||||
* We cannot have merge command for this path as well because
|
* We cannot have merge command for this path as well because
|
||||||
* there cannot be recursively planned merge command.
|
* there cannot be recursively planned merge command.
|
||||||
*/
|
*/
|
||||||
Assert(!ContainsMergeCommandWalker((Node *) parse));
|
Assert(!IsMergeQuery(parse));
|
||||||
|
|
||||||
needsDistributedPlanning = true;
|
needsDistributedPlanning = true;
|
||||||
}
|
}
|
||||||
|
@ -200,12 +211,6 @@ distributed_planner(Query *parse,
|
||||||
|
|
||||||
if (!fastPathRouterQuery)
|
if (!fastPathRouterQuery)
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
* Fast path queries cannot have merge command, and we
|
|
||||||
* prevent the remaining here.
|
|
||||||
*/
|
|
||||||
ErrorIfQueryHasUnsupportedMergeCommand(parse, rangeTableList);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When there are partitioned tables (not applicable to fast path),
|
* When there are partitioned tables (not applicable to fast path),
|
||||||
* pretend that they are regular tables to avoid unnecessary work
|
* pretend that they are regular tables to avoid unnecessary work
|
||||||
|
@ -304,72 +309,6 @@ distributed_planner(Query *parse,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* ErrorIfQueryHasUnsupportedMergeCommand walks over the query tree and bails out
|
|
||||||
* if there is no Merge command (e.g., CMD_MERGE) in the query tree. For merge,
|
|
||||||
* looks for all supported combinations, throws an exception if any violations
|
|
||||||
* are seen.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
ErrorIfQueryHasUnsupportedMergeCommand(Query *queryTree, List *rangeTableList)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Postgres currently doesn't support Merge queries inside subqueries and
|
|
||||||
* ctes, but lets be defensive and do query tree walk anyway.
|
|
||||||
*
|
|
||||||
* We do not call this path for fast-path queries to avoid this additional
|
|
||||||
* overhead.
|
|
||||||
*/
|
|
||||||
if (!ContainsMergeCommandWalker((Node *) queryTree))
|
|
||||||
{
|
|
||||||
/* No MERGE found */
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* In Citus we have limited support for MERGE, it's allowed
|
|
||||||
* only if all the tables(target, source or any CTE) tables
|
|
||||||
* are are local i.e. a combination of Citus local and Non-Citus
|
|
||||||
* tables (regular Postgres tables).
|
|
||||||
*/
|
|
||||||
ErrorIfMergeHasUnsupportedTables(queryTree, rangeTableList);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* ContainsMergeCommandWalker walks over the node and finds if there are any
|
|
||||||
* Merge command (e.g., CMD_MERGE) in the node.
|
|
||||||
*/
|
|
||||||
static bool
|
|
||||||
ContainsMergeCommandWalker(Node *node)
|
|
||||||
{
|
|
||||||
#if PG_VERSION_NUM < PG_VERSION_15
|
|
||||||
return false;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (node == NULL)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (IsA(node, Query))
|
|
||||||
{
|
|
||||||
Query *query = (Query *) node;
|
|
||||||
if (IsMergeQuery(query))
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return query_tree_walker((Query *) node, ContainsMergeCommandWalker, NULL, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return expression_tree_walker(node, ContainsMergeCommandWalker, NULL);
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ExtractRangeTableEntryList is a wrapper around ExtractRangeTableEntryWalker.
|
* ExtractRangeTableEntryList is a wrapper around ExtractRangeTableEntryWalker.
|
||||||
* The function traverses the input query and returns all the range table
|
* The function traverses the input query and returns all the range table
|
||||||
|
@ -669,17 +608,6 @@ IsMultiTaskPlan(DistributedPlan *distributedPlan)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* IsUpdateOrDelete returns true if the query performs an update or delete.
|
|
||||||
*/
|
|
||||||
bool
|
|
||||||
IsUpdateOrDelete(Query *query)
|
|
||||||
{
|
|
||||||
return query->commandType == CMD_UPDATE ||
|
|
||||||
query->commandType == CMD_DELETE;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* PlanFastPathDistributedStmt creates a distributed planned statement using
|
* PlanFastPathDistributedStmt creates a distributed planned statement using
|
||||||
* the FastPathPlanner.
|
* the FastPathPlanner.
|
||||||
|
@ -850,7 +778,7 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext)
|
||||||
* if it is planned as a multi shard modify query.
|
* if it is planned as a multi shard modify query.
|
||||||
*/
|
*/
|
||||||
if ((distributedPlan->planningError ||
|
if ((distributedPlan->planningError ||
|
||||||
(IsUpdateOrDelete(planContext->originalQuery) && IsMultiTaskPlan(
|
(UpdateOrDeleteOrMergeQuery(planContext->originalQuery) && IsMultiTaskPlan(
|
||||||
distributedPlan))) &&
|
distributedPlan))) &&
|
||||||
hasUnresolvedParams)
|
hasUnresolvedParams)
|
||||||
{
|
{
|
||||||
|
@ -955,6 +883,51 @@ TryCreateDistributedPlannedStmt(PlannedStmt *localPlan,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetRouterPlanType checks the parse tree to return appropriate plan type.
|
||||||
|
*/
|
||||||
|
static RouterPlanType
|
||||||
|
GetRouterPlanType(Query *query, Query *originalQuery, bool hasUnresolvedParams)
|
||||||
|
{
|
||||||
|
if (!IsModifyCommand(originalQuery))
|
||||||
|
{
|
||||||
|
return SELECT_QUERY;
|
||||||
|
}
|
||||||
|
|
||||||
|
Oid targetRelationId = ModifyQueryResultRelationId(query);
|
||||||
|
|
||||||
|
EnsureModificationsCanRunOnRelation(targetRelationId);
|
||||||
|
EnsurePartitionTableNotReplicated(targetRelationId);
|
||||||
|
|
||||||
|
/* Check the type of modification being done */
|
||||||
|
|
||||||
|
if (InsertSelectIntoCitusTable(originalQuery))
|
||||||
|
{
|
||||||
|
if (hasUnresolvedParams)
|
||||||
|
{
|
||||||
|
return REPLAN_WITH_BOUND_PARAMETERS;
|
||||||
|
}
|
||||||
|
return INSERT_SELECT_INTO_CITUS_TABLE;
|
||||||
|
}
|
||||||
|
else if (InsertSelectIntoLocalTable(originalQuery))
|
||||||
|
{
|
||||||
|
if (hasUnresolvedParams)
|
||||||
|
{
|
||||||
|
return REPLAN_WITH_BOUND_PARAMETERS;
|
||||||
|
}
|
||||||
|
return INSERT_SELECT_INTO_LOCAL_TABLE;
|
||||||
|
}
|
||||||
|
else if (IsMergeQuery(originalQuery))
|
||||||
|
{
|
||||||
|
return MERGE_QUERY;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return DML_QUERY;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CreateDistributedPlan generates a distributed plan for a query.
|
* CreateDistributedPlan generates a distributed plan for a query.
|
||||||
* It goes through 3 steps:
|
* It goes through 3 steps:
|
||||||
|
@ -972,51 +945,71 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
|
||||||
DistributedPlan *distributedPlan = NULL;
|
DistributedPlan *distributedPlan = NULL;
|
||||||
bool hasCtes = originalQuery->cteList != NIL;
|
bool hasCtes = originalQuery->cteList != NIL;
|
||||||
|
|
||||||
if (IsModifyCommand(originalQuery))
|
/* Step 1: Try router planner */
|
||||||
|
|
||||||
|
RouterPlanType routerPlan = GetRouterPlanType(query, originalQuery,
|
||||||
|
hasUnresolvedParams);
|
||||||
|
|
||||||
|
switch (routerPlan)
|
||||||
{
|
{
|
||||||
Oid targetRelationId = ModifyQueryResultRelationId(query);
|
case INSERT_SELECT_INTO_CITUS_TABLE:
|
||||||
|
|
||||||
EnsureModificationsCanRunOnRelation(targetRelationId);
|
|
||||||
|
|
||||||
EnsurePartitionTableNotReplicated(targetRelationId);
|
|
||||||
|
|
||||||
if (InsertSelectIntoCitusTable(originalQuery))
|
|
||||||
{
|
{
|
||||||
if (hasUnresolvedParams)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Unresolved parameters can cause performance regressions in
|
|
||||||
* INSERT...SELECT when the partition column is a parameter
|
|
||||||
* because we don't perform any additional pruning in the executor.
|
|
||||||
*/
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
distributedPlan =
|
distributedPlan =
|
||||||
CreateInsertSelectPlan(planId, originalQuery, plannerRestrictionContext,
|
CreateInsertSelectPlan(planId,
|
||||||
|
originalQuery,
|
||||||
|
plannerRestrictionContext,
|
||||||
boundParams);
|
boundParams);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
else if (InsertSelectIntoLocalTable(originalQuery))
|
|
||||||
|
case INSERT_SELECT_INTO_LOCAL_TABLE:
|
||||||
{
|
{
|
||||||
if (hasUnresolvedParams)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Unresolved parameters can cause performance regressions in
|
|
||||||
* INSERT...SELECT when the partition column is a parameter
|
|
||||||
* because we don't perform any additional pruning in the executor.
|
|
||||||
*/
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
distributedPlan =
|
distributedPlan =
|
||||||
CreateInsertSelectIntoLocalTablePlan(planId, originalQuery, boundParams,
|
CreateInsertSelectIntoLocalTablePlan(planId,
|
||||||
|
originalQuery,
|
||||||
|
boundParams,
|
||||||
hasUnresolvedParams,
|
hasUnresolvedParams,
|
||||||
plannerRestrictionContext);
|
plannerRestrictionContext);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
case DML_QUERY:
|
||||||
{
|
{
|
||||||
/* modifications are always routed through the same planner/executor */
|
/* modifications are always routed through the same planner/executor */
|
||||||
distributedPlan =
|
distributedPlan =
|
||||||
CreateModifyPlan(originalQuery, query, plannerRestrictionContext);
|
CreateModifyPlan(originalQuery, query, plannerRestrictionContext);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case MERGE_QUERY:
|
||||||
|
{
|
||||||
|
distributedPlan =
|
||||||
|
CreateMergePlan(originalQuery, query, plannerRestrictionContext);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case REPLAN_WITH_BOUND_PARAMETERS:
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Unresolved parameters can cause performance regressions in
|
||||||
|
* INSERT...SELECT when the partition column is a parameter
|
||||||
|
* because we don't perform any additional pruning in the executor.
|
||||||
|
*/
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
case SELECT_QUERY:
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* For select queries we, if router executor is enabled, first try to
|
||||||
|
* plan the query as a router query. If not supported, otherwise try
|
||||||
|
* the full blown plan/optimize/physical planning process needed to
|
||||||
|
* produce distributed query plans.
|
||||||
|
*/
|
||||||
|
distributedPlan =
|
||||||
|
CreateRouterPlan(originalQuery, query, plannerRestrictionContext);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* the functions above always return a plan, possibly with an error */
|
/* the functions above always return a plan, possibly with an error */
|
||||||
|
@ -1030,31 +1023,6 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
|
||||||
{
|
{
|
||||||
RaiseDeferredError(distributedPlan->planningError, DEBUG2);
|
RaiseDeferredError(distributedPlan->planningError, DEBUG2);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* For select queries we, if router executor is enabled, first try to
|
|
||||||
* plan the query as a router query. If not supported, otherwise try
|
|
||||||
* the full blown plan/optimize/physical planning process needed to
|
|
||||||
* produce distributed query plans.
|
|
||||||
*/
|
|
||||||
|
|
||||||
distributedPlan = CreateRouterPlan(originalQuery, query,
|
|
||||||
plannerRestrictionContext);
|
|
||||||
if (distributedPlan->planningError == NULL)
|
|
||||||
{
|
|
||||||
return distributedPlan;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* For debugging it's useful to display why query was not
|
|
||||||
* router plannable.
|
|
||||||
*/
|
|
||||||
RaiseDeferredError(distributedPlan->planningError, DEBUG2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hasUnresolvedParams)
|
if (hasUnresolvedParams)
|
||||||
{
|
{
|
||||||
|
@ -1082,6 +1050,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
|
||||||
boundParams);
|
boundParams);
|
||||||
Assert(originalQuery != NULL);
|
Assert(originalQuery != NULL);
|
||||||
|
|
||||||
|
/* Step 2: Generate subplans for CTEs and complex subqueries */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Plan subqueries and CTEs that cannot be pushed down by recursively
|
* Plan subqueries and CTEs that cannot be pushed down by recursively
|
||||||
* calling the planner and return the resulting plans to subPlanList.
|
* calling the planner and return the resulting plans to subPlanList.
|
||||||
|
@ -1182,6 +1152,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
|
||||||
query->cteList = NIL;
|
query->cteList = NIL;
|
||||||
Assert(originalQuery->cteList == NIL);
|
Assert(originalQuery->cteList == NIL);
|
||||||
|
|
||||||
|
/* Step 3: Try Logical planner */
|
||||||
|
|
||||||
MultiTreeRoot *logicalPlan = MultiLogicalPlanCreate(originalQuery, query,
|
MultiTreeRoot *logicalPlan = MultiLogicalPlanCreate(originalQuery, query,
|
||||||
plannerRestrictionContext);
|
plannerRestrictionContext);
|
||||||
MultiLogicalPlanOptimize(logicalPlan);
|
MultiLogicalPlanOptimize(logicalPlan);
|
||||||
|
@ -2611,148 +2583,3 @@ WarnIfListHasForeignDistributedTable(List *rangeTableList)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is
|
|
||||||
* permitted on special relations, such as materialized view, returns true only if
|
|
||||||
* it's a "source" relation.
|
|
||||||
*/
|
|
||||||
bool
|
|
||||||
IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte)
|
|
||||||
{
|
|
||||||
if (!IsMergeQuery(parse))
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable);
|
|
||||||
|
|
||||||
/* Is it a target relation? */
|
|
||||||
if (targetRte->relid == rte->relid)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE
|
|
||||||
* present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus
|
|
||||||
* tables (regular Postgres tables), raises an exception for all other combinations.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList)
|
|
||||||
{
|
|
||||||
ListCell *tableCell = NULL;
|
|
||||||
|
|
||||||
foreach(tableCell, rangeTableList)
|
|
||||||
{
|
|
||||||
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(tableCell);
|
|
||||||
Oid relationId = rangeTableEntry->relid;
|
|
||||||
|
|
||||||
switch (rangeTableEntry->rtekind)
|
|
||||||
{
|
|
||||||
case RTE_RELATION:
|
|
||||||
{
|
|
||||||
/* Check the relation type */
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case RTE_SUBQUERY:
|
|
||||||
case RTE_FUNCTION:
|
|
||||||
case RTE_TABLEFUNC:
|
|
||||||
case RTE_VALUES:
|
|
||||||
case RTE_JOIN:
|
|
||||||
case RTE_CTE:
|
|
||||||
{
|
|
||||||
/* Skip them as base table(s) will be checked */
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations,
|
|
||||||
* such as, trigger data; until we find a genuine use case, raise an
|
|
||||||
* exception.
|
|
||||||
* RTE_RESULT is a node added by the planner and we shouldn't
|
|
||||||
* encounter it in the parse tree.
|
|
||||||
*/
|
|
||||||
case RTE_NAMEDTUPLESTORE:
|
|
||||||
case RTE_RESULT:
|
|
||||||
{
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
||||||
errmsg("MERGE command is not supported with "
|
|
||||||
"Tuplestores and results")));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
||||||
errmsg("MERGE command: Unrecognized range table entry.")));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* RTE Relation can be of various types, check them now */
|
|
||||||
|
|
||||||
/* skip the regular views as they are replaced with subqueries */
|
|
||||||
if (rangeTableEntry->relkind == RELKIND_VIEW)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rangeTableEntry->relkind == RELKIND_MATVIEW ||
|
|
||||||
rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE)
|
|
||||||
{
|
|
||||||
/* Materialized view or Foreign table as target is not allowed */
|
|
||||||
if (IsMergeAllowedOnRelation(parse, rangeTableEntry))
|
|
||||||
{
|
|
||||||
/* Non target relation is ok */
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
||||||
errmsg("MERGE command is not allowed "
|
|
||||||
"on materialized view")));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rangeTableEntry->relkind != RELKIND_RELATION &&
|
|
||||||
rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE)
|
|
||||||
{
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
||||||
errmsg("Unexpected relation type(relkind:%c) in MERGE command",
|
|
||||||
rangeTableEntry->relkind)));
|
|
||||||
}
|
|
||||||
|
|
||||||
Assert(rangeTableEntry->relid != 0);
|
|
||||||
|
|
||||||
/* Distributed tables and Reference tables are not supported yet */
|
|
||||||
if (IsCitusTableType(relationId, REFERENCE_TABLE) ||
|
|
||||||
IsCitusTableType(relationId, DISTRIBUTED_TABLE))
|
|
||||||
{
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
||||||
errmsg("MERGE command is not supported on "
|
|
||||||
"distributed/reference tables yet")));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Regular Postgres tables and Citus local tables are allowed */
|
|
||||||
if (!IsCitusTable(relationId) ||
|
|
||||||
IsCitusTableType(relationId, CITUS_LOCAL_TABLE))
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Any other Citus table type missing ? */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* All the tables are local, supported */
|
|
||||||
}
|
|
||||||
|
|
|
@ -54,10 +54,11 @@
|
||||||
bool EnableFastPathRouterPlanner = true;
|
bool EnableFastPathRouterPlanner = true;
|
||||||
|
|
||||||
static bool ColumnAppearsMultipleTimes(Node *quals, Var *distributionKey);
|
static bool ColumnAppearsMultipleTimes(Node *quals, Var *distributionKey);
|
||||||
static bool ConjunctionContainsColumnFilter(Node *node, Var *column,
|
|
||||||
Node **distributionKeyValue);
|
|
||||||
static bool DistKeyInSimpleOpExpression(Expr *clause, Var *distColumn,
|
static bool DistKeyInSimpleOpExpression(Expr *clause, Var *distColumn,
|
||||||
Node **distributionKeyValue);
|
Node **distributionKeyValue);
|
||||||
|
static bool ConjunctionContainsColumnFilter(Node *node,
|
||||||
|
Var *column,
|
||||||
|
Node **distributionKeyValue);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -875,7 +875,7 @@ RouterModifyTaskForShardInterval(Query *originalQuery,
|
||||||
&prunedShardIntervalListList,
|
&prunedShardIntervalListList,
|
||||||
replacePrunedQueryWithDummy,
|
replacePrunedQueryWithDummy,
|
||||||
&multiShardModifyQuery, NULL,
|
&multiShardModifyQuery, NULL,
|
||||||
false);
|
NULL);
|
||||||
|
|
||||||
Assert(!multiShardModifyQuery);
|
Assert(!multiShardModifyQuery);
|
||||||
|
|
||||||
|
@ -938,6 +938,7 @@ RouterModifyTaskForShardInterval(Query *originalQuery,
|
||||||
modifyTask->taskPlacementList = insertShardPlacementList;
|
modifyTask->taskPlacementList = insertShardPlacementList;
|
||||||
modifyTask->relationShardList = relationShardList;
|
modifyTask->relationShardList = relationShardList;
|
||||||
modifyTask->replicationModel = targetTableCacheEntry->replicationModel;
|
modifyTask->replicationModel = targetTableCacheEntry->replicationModel;
|
||||||
|
modifyTask->isLocalTableModification = false;
|
||||||
|
|
||||||
return modifyTask;
|
return modifyTask;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,738 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* merge_planner.c
|
||||||
|
*
|
||||||
|
* This file contains functions to help plan MERGE queries.
|
||||||
|
*
|
||||||
|
* Copyright (c) Citus Data, Inc.
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
#include "postgres.h"
|
||||||
|
#include "nodes/makefuncs.h"
|
||||||
|
#include "optimizer/optimizer.h"
|
||||||
|
#include "parser/parsetree.h"
|
||||||
|
#include "utils/lsyscache.h"
|
||||||
|
|
||||||
|
#include "distributed/citus_clauses.h"
|
||||||
|
#include "distributed/listutils.h"
|
||||||
|
#include "distributed/merge_planner.h"
|
||||||
|
#include "distributed/multi_logical_optimizer.h"
|
||||||
|
#include "distributed/multi_router_planner.h"
|
||||||
|
#include "distributed/pg_version_constants.h"
|
||||||
|
#include "distributed/query_pushdown_planning.h"
|
||||||
|
|
||||||
|
#if PG_VERSION_NUM >= PG_VERSION_15
|
||||||
|
|
||||||
|
static DeferredErrorMessage * CheckIfRTETypeIsUnsupported(Query *parse,
|
||||||
|
RangeTblEntry *rangeTableEntry);
|
||||||
|
static DeferredErrorMessage * ErrorIfDistTablesNotColocated(Query *parse,
|
||||||
|
List *
|
||||||
|
distTablesList,
|
||||||
|
PlannerRestrictionContext
|
||||||
|
*
|
||||||
|
plannerRestrictionContext);
|
||||||
|
static DeferredErrorMessage * ErrorIfMergeHasUnsupportedTables(Query *parse,
|
||||||
|
List *rangeTableList,
|
||||||
|
PlannerRestrictionContext *
|
||||||
|
restrictionContext);
|
||||||
|
static bool IsDistributionColumnInMergeSource(Expr *columnExpression, Query *query, bool
|
||||||
|
skipOuterVars);
|
||||||
|
static DeferredErrorMessage * InsertDistributionColumnMatchesSource(Query *query,
|
||||||
|
RangeTblEntry *
|
||||||
|
resultRte);
|
||||||
|
|
||||||
|
static DeferredErrorMessage * MergeQualAndTargetListFunctionsSupported(Oid
|
||||||
|
resultRelationId,
|
||||||
|
FromExpr *joinTree,
|
||||||
|
Node *quals,
|
||||||
|
List *targetList,
|
||||||
|
CmdType commandType);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CreateMergePlan attempts to create a plan for the given MERGE SQL
|
||||||
|
* statement. If planning fails ->planningError is set to a description
|
||||||
|
* of the failure.
|
||||||
|
*/
|
||||||
|
DistributedPlan *
|
||||||
|
CreateMergePlan(Query *originalQuery, Query *query,
|
||||||
|
PlannerRestrictionContext *plannerRestrictionContext)
|
||||||
|
{
|
||||||
|
DistributedPlan *distributedPlan = CitusMakeNode(DistributedPlan);
|
||||||
|
bool multiShardQuery = false;
|
||||||
|
|
||||||
|
Assert(originalQuery->commandType == CMD_MERGE);
|
||||||
|
|
||||||
|
distributedPlan->modLevel = RowModifyLevelForQuery(query);
|
||||||
|
|
||||||
|
distributedPlan->planningError = MergeQuerySupported(originalQuery,
|
||||||
|
multiShardQuery,
|
||||||
|
plannerRestrictionContext);
|
||||||
|
|
||||||
|
if (distributedPlan->planningError != NULL)
|
||||||
|
{
|
||||||
|
return distributedPlan;
|
||||||
|
}
|
||||||
|
|
||||||
|
Job *job = RouterJob(originalQuery, plannerRestrictionContext,
|
||||||
|
&distributedPlan->planningError);
|
||||||
|
|
||||||
|
if (distributedPlan->planningError != NULL)
|
||||||
|
{
|
||||||
|
return distributedPlan;
|
||||||
|
}
|
||||||
|
|
||||||
|
ereport(DEBUG1, (errmsg("Creating MERGE router plan")));
|
||||||
|
|
||||||
|
distributedPlan->workerJob = job;
|
||||||
|
distributedPlan->combineQuery = NULL;
|
||||||
|
|
||||||
|
/* MERGE doesn't support RETURNING clause */
|
||||||
|
distributedPlan->expectResults = false;
|
||||||
|
distributedPlan->targetRelationId = ResultRelationOidForQuery(query);
|
||||||
|
|
||||||
|
distributedPlan->fastPathRouterPlan =
|
||||||
|
plannerRestrictionContext->fastPathRestrictionContext->fastPathRouterQuery;
|
||||||
|
|
||||||
|
return distributedPlan;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MergeQuerySupported does check for a MERGE command in the query, if it finds
|
||||||
|
* one, it will verify the below criteria
|
||||||
|
* - Supported tables and combinations in ErrorIfMergeHasUnsupportedTables
|
||||||
|
* - Distributed tables requirements in ErrorIfDistTablesNotColocated
|
||||||
|
* - Checks target-lists and functions-in-quals in TargetlistAndFunctionsSupported
|
||||||
|
*/
|
||||||
|
DeferredErrorMessage *
|
||||||
|
MergeQuerySupported(Query *originalQuery, bool multiShardQuery,
|
||||||
|
PlannerRestrictionContext *plannerRestrictionContext)
|
||||||
|
{
|
||||||
|
/* function is void for pre-15 versions of Postgres */
|
||||||
|
#if PG_VERSION_NUM < PG_VERSION_15
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TODO: For now, we are adding an exception where any volatile or stable
|
||||||
|
* functions are not allowed in the MERGE query, but this will become too
|
||||||
|
* restrictive as this will prevent many useful and simple cases, such as,
|
||||||
|
* INSERT VALUES(ts::timestamp), bigserial column inserts etc. But without
|
||||||
|
* this restriction, we have a potential danger of some of the function(s)
|
||||||
|
* getting executed at the worker which will result in incorrect behavior.
|
||||||
|
*/
|
||||||
|
if (contain_mutable_functions((Node *) originalQuery))
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"non-IMMUTABLE functions are not yet supported "
|
||||||
|
"in MERGE sql with distributed tables ",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
List *rangeTableList = ExtractRangeTableEntryList(originalQuery);
|
||||||
|
RangeTblEntry *resultRte = ExtractResultRelationRTE(originalQuery);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fast path queries cannot have merge command, and we prevent the remaining here.
|
||||||
|
* In Citus we have limited support for MERGE, it's allowed only if all
|
||||||
|
* the tables(target, source or any CTE) tables are are local i.e. a
|
||||||
|
* combination of Citus local and Non-Citus tables (regular Postgres tables)
|
||||||
|
* or distributed tables with some restrictions, please see header of routine
|
||||||
|
* ErrorIfDistTablesNotColocated for details.
|
||||||
|
*/
|
||||||
|
DeferredErrorMessage *deferredError =
|
||||||
|
ErrorIfMergeHasUnsupportedTables(originalQuery,
|
||||||
|
rangeTableList,
|
||||||
|
plannerRestrictionContext);
|
||||||
|
if (deferredError)
|
||||||
|
{
|
||||||
|
/* MERGE's unsupported combination, raise the exception */
|
||||||
|
RaiseDeferredError(deferredError, ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
Oid resultRelationId = resultRte->relid;
|
||||||
|
deferredError = MergeQualAndTargetListFunctionsSupported(resultRelationId,
|
||||||
|
originalQuery->jointree,
|
||||||
|
originalQuery->jointree->
|
||||||
|
quals,
|
||||||
|
originalQuery->targetList,
|
||||||
|
originalQuery->commandType);
|
||||||
|
if (deferredError)
|
||||||
|
{
|
||||||
|
return deferredError;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MERGE is a special case where we have multiple modify statements
|
||||||
|
* within itself. Check each INSERT/UPDATE/DELETE individually.
|
||||||
|
*/
|
||||||
|
MergeAction *action = NULL;
|
||||||
|
foreach_ptr(action, originalQuery->mergeActionList)
|
||||||
|
{
|
||||||
|
Assert(originalQuery->returningList == NULL);
|
||||||
|
deferredError = MergeQualAndTargetListFunctionsSupported(resultRelationId,
|
||||||
|
originalQuery->jointree,
|
||||||
|
action->qual,
|
||||||
|
action->targetList,
|
||||||
|
action->commandType);
|
||||||
|
if (deferredError)
|
||||||
|
{
|
||||||
|
/* MERGE's unsupported scenario, raise the exception */
|
||||||
|
RaiseDeferredError(deferredError, ERROR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
deferredError =
|
||||||
|
InsertDistributionColumnMatchesSource(originalQuery, resultRte);
|
||||||
|
if (deferredError)
|
||||||
|
{
|
||||||
|
/* MERGE's unsupported scenario, raise the exception */
|
||||||
|
RaiseDeferredError(deferredError, ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (multiShardQuery)
|
||||||
|
{
|
||||||
|
deferredError =
|
||||||
|
DeferErrorIfUnsupportedSubqueryPushdown(originalQuery,
|
||||||
|
plannerRestrictionContext);
|
||||||
|
if (deferredError)
|
||||||
|
{
|
||||||
|
return deferredError;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (HasDangerousJoinUsing(originalQuery->rtable, (Node *) originalQuery->jointree))
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"a join with USING causes an internal naming "
|
||||||
|
"conflict, use ON instead", NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* IsMergeAllowedOnRelation takes a relation entry and checks if MERGE command is
|
||||||
|
* permitted on special relations, such as materialized view, returns true only if
|
||||||
|
* it's a "source" relation.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
IsMergeAllowedOnRelation(Query *parse, RangeTblEntry *rte)
|
||||||
|
{
|
||||||
|
if (!IsMergeQuery(parse))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fetch the MERGE target relation */
|
||||||
|
RangeTblEntry *targetRte = rt_fetch(parse->resultRelation, parse->rtable);
|
||||||
|
|
||||||
|
/* Is it a target relation? */
|
||||||
|
if (targetRte->relid == rte->relid)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if PG_VERSION_NUM >= PG_VERSION_15
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ErrorIfDistTablesNotColocated Checks to see if
|
||||||
|
*
|
||||||
|
* - There are a minimum of two distributed tables (source and a target).
|
||||||
|
* - All the distributed tables are indeed colocated.
|
||||||
|
*
|
||||||
|
* If any of the conditions are not met, it raises an exception.
|
||||||
|
*/
|
||||||
|
static DeferredErrorMessage *
|
||||||
|
ErrorIfDistTablesNotColocated(Query *parse, List *distTablesList,
|
||||||
|
PlannerRestrictionContext *
|
||||||
|
plannerRestrictionContext)
|
||||||
|
{
|
||||||
|
/* All MERGE tables must be distributed */
|
||||||
|
if (list_length(distTablesList) < 2)
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"For MERGE command, both the source and target "
|
||||||
|
"must be distributed", NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* All distributed tables must be colocated */
|
||||||
|
if (!AllDistributedRelationsInRTEListColocated(distTablesList))
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"For MERGE command, all the distributed tables "
|
||||||
|
"must be colocated", NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ErrorIfRTETypeIsUnsupported Checks for types of tables that are not supported, such
|
||||||
|
* as, reference tables, append-distributed tables and materialized view as target relation.
|
||||||
|
* Routine returns NULL for the supported types, error message for everything else.
|
||||||
|
*/
|
||||||
|
static DeferredErrorMessage *
|
||||||
|
CheckIfRTETypeIsUnsupported(Query *parse, RangeTblEntry *rangeTableEntry)
|
||||||
|
{
|
||||||
|
if (rangeTableEntry->relkind == RELKIND_MATVIEW ||
|
||||||
|
rangeTableEntry->relkind == RELKIND_FOREIGN_TABLE)
|
||||||
|
{
|
||||||
|
/* Materialized view or Foreign table as target is not allowed */
|
||||||
|
if (IsMergeAllowedOnRelation(parse, rangeTableEntry))
|
||||||
|
{
|
||||||
|
/* Non target relation is ok */
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Usually we don't reach this exception as the Postgres parser catches it */
|
||||||
|
StringInfo errorMessage = makeStringInfo();
|
||||||
|
appendStringInfo(errorMessage, "MERGE command is not allowed on "
|
||||||
|
"relation type(relkind:%c)",
|
||||||
|
rangeTableEntry->relkind);
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
errorMessage->data, NULL, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rangeTableEntry->relkind != RELKIND_RELATION &&
|
||||||
|
rangeTableEntry->relkind != RELKIND_PARTITIONED_TABLE)
|
||||||
|
{
|
||||||
|
StringInfo errorMessage = makeStringInfo();
|
||||||
|
appendStringInfo(errorMessage, "Unexpected table type(relkind:%c) "
|
||||||
|
"in MERGE command", rangeTableEntry->relkind);
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
errorMessage->data, NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert(rangeTableEntry->relid != 0);
|
||||||
|
|
||||||
|
/* Reference tables are not supported yet */
|
||||||
|
if (IsCitusTableType(rangeTableEntry->relid, REFERENCE_TABLE))
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"MERGE command is not supported on reference "
|
||||||
|
"tables yet", NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Append/Range tables are not supported */
|
||||||
|
if (IsCitusTableType(rangeTableEntry->relid, APPEND_DISTRIBUTED) ||
|
||||||
|
IsCitusTableType(rangeTableEntry->relid, RANGE_DISTRIBUTED))
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"For MERGE command, all the distributed tables "
|
||||||
|
"must be colocated, for append/range distribution, "
|
||||||
|
"colocation is not supported", NULL,
|
||||||
|
"Consider using hash distribution instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ErrorIfMergeHasUnsupportedTables checks if all the tables(target, source or any CTE
|
||||||
|
* present) in the MERGE command are local i.e. a combination of Citus local and Non-Citus
|
||||||
|
* tables (regular Postgres tables), or distributed tables with some restrictions, please
|
||||||
|
* see header of routine ErrorIfDistTablesNotColocated for details, raises an exception
|
||||||
|
* for all other combinations.
|
||||||
|
*/
|
||||||
|
static DeferredErrorMessage *
|
||||||
|
ErrorIfMergeHasUnsupportedTables(Query *parse, List *rangeTableList,
|
||||||
|
PlannerRestrictionContext *restrictionContext)
|
||||||
|
{
|
||||||
|
List *distTablesList = NIL;
|
||||||
|
bool foundLocalTables = false;
|
||||||
|
|
||||||
|
RangeTblEntry *rangeTableEntry = NULL;
|
||||||
|
foreach_ptr(rangeTableEntry, rangeTableList)
|
||||||
|
{
|
||||||
|
Oid relationId = rangeTableEntry->relid;
|
||||||
|
|
||||||
|
switch (rangeTableEntry->rtekind)
|
||||||
|
{
|
||||||
|
case RTE_RELATION:
|
||||||
|
{
|
||||||
|
/* Check the relation type */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case RTE_SUBQUERY:
|
||||||
|
case RTE_FUNCTION:
|
||||||
|
case RTE_TABLEFUNC:
|
||||||
|
case RTE_VALUES:
|
||||||
|
case RTE_JOIN:
|
||||||
|
case RTE_CTE:
|
||||||
|
{
|
||||||
|
/* Skip them as base table(s) will be checked */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RTE_NAMEDTUPLESTORE is typically used in ephmeral named relations,
|
||||||
|
* such as, trigger data; until we find a genuine use case, raise an
|
||||||
|
* exception.
|
||||||
|
* RTE_RESULT is a node added by the planner and we shouldn't
|
||||||
|
* encounter it in the parse tree.
|
||||||
|
*/
|
||||||
|
case RTE_NAMEDTUPLESTORE:
|
||||||
|
case RTE_RESULT:
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"MERGE command is not supported with "
|
||||||
|
"Tuplestores and results",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"MERGE command: Unrecognized range table entry.",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* RTE Relation can be of various types, check them now */
|
||||||
|
|
||||||
|
/* skip the regular views as they are replaced with subqueries */
|
||||||
|
if (rangeTableEntry->relkind == RELKIND_VIEW)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
DeferredErrorMessage *errorMessage =
|
||||||
|
CheckIfRTETypeIsUnsupported(parse, rangeTableEntry);
|
||||||
|
if (errorMessage)
|
||||||
|
{
|
||||||
|
return errorMessage;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For now, save all distributed tables, later (below) we will
|
||||||
|
* check for supported combination(s).
|
||||||
|
*/
|
||||||
|
if (IsCitusTableType(relationId, DISTRIBUTED_TABLE))
|
||||||
|
{
|
||||||
|
distTablesList = lappend(distTablesList, rangeTableEntry);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Regular Postgres tables and Citus local tables are allowed */
|
||||||
|
if (!IsCitusTable(relationId) ||
|
||||||
|
IsCitusTableType(relationId, CITUS_LOCAL_TABLE))
|
||||||
|
{
|
||||||
|
foundLocalTables = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Any other Citus table type missing ? */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ensure all tables are indeed local */
|
||||||
|
if (foundLocalTables && list_length(distTablesList) == 0)
|
||||||
|
{
|
||||||
|
/* All the tables are local, supported */
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
else if (foundLocalTables && list_length(distTablesList) > 0)
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"MERGE command is not supported with "
|
||||||
|
"combination of distributed/local tables yet",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ensure all distributed tables are indeed co-located */
|
||||||
|
return ErrorIfDistTablesNotColocated(parse,
|
||||||
|
distTablesList,
|
||||||
|
restrictionContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* IsPartitionColumnInMerge returns true if the given column is a partition column.
|
||||||
|
* The function uses FindReferencedTableColumn to find the original relation
|
||||||
|
* id and column that the column expression refers to. It then checks whether
|
||||||
|
* that column is a partition column of the relation.
|
||||||
|
*
|
||||||
|
* Also, the function returns always false for reference tables given that
|
||||||
|
* reference tables do not have partition column.
|
||||||
|
*
|
||||||
|
* If skipOuterVars is true, then it doesn't process the outervars.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
IsDistributionColumnInMergeSource(Expr *columnExpression, Query *query, bool
|
||||||
|
skipOuterVars)
|
||||||
|
{
|
||||||
|
bool isDistributionColumn = false;
|
||||||
|
Var *column = NULL;
|
||||||
|
RangeTblEntry *relationRTE = NULL;
|
||||||
|
|
||||||
|
/* ParentQueryList is same as the original query for MERGE */
|
||||||
|
FindReferencedTableColumn(columnExpression, list_make1(query), query, &column,
|
||||||
|
&relationRTE,
|
||||||
|
skipOuterVars);
|
||||||
|
Oid relationId = relationRTE ? relationRTE->relid : InvalidOid;
|
||||||
|
if (relationId != InvalidOid && column != NULL)
|
||||||
|
{
|
||||||
|
Var *distributionColumn = DistPartitionKey(relationId);
|
||||||
|
|
||||||
|
/* not all distributed tables have partition column */
|
||||||
|
if (distributionColumn != NULL && column->varattno ==
|
||||||
|
distributionColumn->varattno)
|
||||||
|
{
|
||||||
|
isDistributionColumn = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return isDistributionColumn;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* InsertDistributionColumnMatchesSource check to see if MERGE is inserting a
|
||||||
|
* value into the target which is not from the source table, if so, it
|
||||||
|
* raises an exception.
|
||||||
|
* Note: Inserting random values other than the joined column values will
|
||||||
|
* result in unexpected behaviour of rows ending up in incorrect shards, to
|
||||||
|
* prevent such mishaps, we disallow such inserts here.
|
||||||
|
*/
|
||||||
|
static DeferredErrorMessage *
|
||||||
|
InsertDistributionColumnMatchesSource(Query *query, RangeTblEntry *resultRte)
|
||||||
|
{
|
||||||
|
Assert(IsMergeQuery(query));
|
||||||
|
|
||||||
|
if (!IsCitusTableType(resultRte->relid, DISTRIBUTED_TABLE))
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool foundDistributionColumn = false;
|
||||||
|
MergeAction *action = NULL;
|
||||||
|
foreach_ptr(action, query->mergeActionList)
|
||||||
|
{
|
||||||
|
/* Skip MATCHED clause as INSERTS are not allowed in it*/
|
||||||
|
if (action->matched)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* NOT MATCHED can have either INSERT or DO NOTHING */
|
||||||
|
if (action->commandType == CMD_NOTHING)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (action->targetList == NIL)
|
||||||
|
{
|
||||||
|
/* INSERT DEFAULT VALUES is not allowed */
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"cannot perform MERGE INSERT with DEFAULTS",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert(action->commandType == CMD_INSERT);
|
||||||
|
Var *targetKey = PartitionColumn(resultRte->relid, 1);
|
||||||
|
|
||||||
|
TargetEntry *targetEntry = NULL;
|
||||||
|
foreach_ptr(targetEntry, action->targetList)
|
||||||
|
{
|
||||||
|
AttrNumber originalAttrNo = targetEntry->resno;
|
||||||
|
|
||||||
|
/* skip processing of target table non-partition columns */
|
||||||
|
if (originalAttrNo != targetKey->varattno)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
foundDistributionColumn = true;
|
||||||
|
|
||||||
|
if (IsA(targetEntry->expr, Var))
|
||||||
|
{
|
||||||
|
if (IsDistributionColumnInMergeSource(targetEntry->expr, query, true))
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"MERGE INSERT must use the source table "
|
||||||
|
"distribution column value",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"MERGE INSERT must refer a source column "
|
||||||
|
"for distribution column ",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!foundDistributionColumn)
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"MERGE INSERT must have distribution column as value",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MergeQualAndTargetListFunctionsSupported Checks WHEN/ON clause actions to see what functions
|
||||||
|
* are allowed, if we are updating distribution column, etc.
|
||||||
|
*/
|
||||||
|
static DeferredErrorMessage *
|
||||||
|
MergeQualAndTargetListFunctionsSupported(Oid resultRelationId, FromExpr *joinTree,
|
||||||
|
Node *quals,
|
||||||
|
List *targetList, CmdType commandType)
|
||||||
|
{
|
||||||
|
uint32 rangeTableId = 1;
|
||||||
|
Var *distributionColumn = NULL;
|
||||||
|
if (IsCitusTable(resultRelationId) && HasDistributionKey(resultRelationId))
|
||||||
|
{
|
||||||
|
distributionColumn = PartitionColumn(resultRelationId, rangeTableId);
|
||||||
|
}
|
||||||
|
|
||||||
|
ListCell *targetEntryCell = NULL;
|
||||||
|
bool hasVarArgument = false; /* A STABLE function is passed a Var argument */
|
||||||
|
bool hasBadCoalesce = false; /* CASE/COALESCE passed a mutable function */
|
||||||
|
foreach(targetEntryCell, targetList)
|
||||||
|
{
|
||||||
|
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
|
||||||
|
|
||||||
|
bool targetEntryDistributionColumn = false;
|
||||||
|
AttrNumber targetColumnAttrNumber = InvalidAttrNumber;
|
||||||
|
|
||||||
|
if (distributionColumn)
|
||||||
|
{
|
||||||
|
if (commandType == CMD_UPDATE)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Note that it is not possible to give an alias to
|
||||||
|
* UPDATE table SET ...
|
||||||
|
*/
|
||||||
|
if (targetEntry->resname)
|
||||||
|
{
|
||||||
|
targetColumnAttrNumber = get_attnum(resultRelationId,
|
||||||
|
targetEntry->resname);
|
||||||
|
if (targetColumnAttrNumber == distributionColumn->varattno)
|
||||||
|
{
|
||||||
|
targetEntryDistributionColumn = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (targetEntryDistributionColumn &&
|
||||||
|
TargetEntryChangesValue(targetEntry, distributionColumn, joinTree))
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"updating the distribution column is not "
|
||||||
|
"allowed in MERGE actions",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
|
||||||
|
CitusIsVolatileFunction))
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"functions used in MERGE actions on distributed "
|
||||||
|
"tables must not be VOLATILE",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (MasterIrreducibleExpression((Node *) targetEntry->expr,
|
||||||
|
&hasVarArgument, &hasBadCoalesce))
|
||||||
|
{
|
||||||
|
Assert(hasVarArgument || hasBadCoalesce);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
|
||||||
|
NodeIsFieldStore))
|
||||||
|
{
|
||||||
|
/* DELETE cannot do field indirection already */
|
||||||
|
Assert(commandType == CMD_UPDATE || commandType == CMD_INSERT);
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"inserting or modifying composite type fields is not "
|
||||||
|
"supported", NULL,
|
||||||
|
"Use the column name to insert or update the composite "
|
||||||
|
"type as a single value");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check the condition, convert list of expressions into expression tree for further processing
|
||||||
|
*/
|
||||||
|
if (quals)
|
||||||
|
{
|
||||||
|
if (IsA(quals, List))
|
||||||
|
{
|
||||||
|
quals = (Node *) make_ands_explicit((List *) quals);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (FindNodeMatchingCheckFunction((Node *) quals, CitusIsVolatileFunction))
|
||||||
|
{
|
||||||
|
StringInfo errorMessage = makeStringInfo();
|
||||||
|
appendStringInfo(errorMessage, "functions used in the %s clause of MERGE "
|
||||||
|
"queries on distributed tables must not be VOLATILE",
|
||||||
|
(commandType == CMD_MERGE) ? "ON" : "WHEN");
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
errorMessage->data, NULL, NULL);
|
||||||
|
}
|
||||||
|
else if (MasterIrreducibleExpression(quals, &hasVarArgument, &hasBadCoalesce))
|
||||||
|
{
|
||||||
|
Assert(hasVarArgument || hasBadCoalesce);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasVarArgument)
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"STABLE functions used in MERGE queries "
|
||||||
|
"cannot be called with column references",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasBadCoalesce)
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"non-IMMUTABLE functions are not allowed in CASE or "
|
||||||
|
"COALESCE statements",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (quals != NULL && nodeTag(quals) == T_CurrentOfExpr)
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"cannot run MERGE actions with cursors",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
|
@ -29,6 +29,7 @@
|
||||||
#include "distributed/citus_nodefuncs.h"
|
#include "distributed/citus_nodefuncs.h"
|
||||||
#include "distributed/connection_management.h"
|
#include "distributed/connection_management.h"
|
||||||
#include "distributed/deparse_shard_query.h"
|
#include "distributed/deparse_shard_query.h"
|
||||||
|
#include "distributed/executor_util.h"
|
||||||
#include "distributed/insert_select_planner.h"
|
#include "distributed/insert_select_planner.h"
|
||||||
#include "distributed/insert_select_executor.h"
|
#include "distributed/insert_select_executor.h"
|
||||||
#include "distributed/listutils.h"
|
#include "distributed/listutils.h"
|
||||||
|
@ -199,20 +200,6 @@ CitusExplainScan(CustomScanState *node, List *ancestors, struct ExplainState *es
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* ALTER TABLE statements are not explained by postgres. However ALTER TABLE statements
|
|
||||||
* may trigger SELECT statements causing explain hook to run. This situation causes a crash in a worker.
|
|
||||||
* Therefore we will detect if we are explaining a triggered query when we are processing
|
|
||||||
* an ALTER TABLE statement and stop explain in this situation.
|
|
||||||
*/
|
|
||||||
if (AlterTableInProgress())
|
|
||||||
{
|
|
||||||
ExplainPropertyText("Citus Explain Scan",
|
|
||||||
"Explain for triggered constraint validation queries during ALTER TABLE commands are not supported by Citus",
|
|
||||||
es);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
ExplainOpenGroup("Distributed Query", "Distributed Query", true, es);
|
ExplainOpenGroup("Distributed Query", "Distributed Query", true, es);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1383,7 +1383,7 @@ DistPartitionKey(Oid relationId)
|
||||||
CitusTableCacheEntry *partitionEntry = GetCitusTableCacheEntry(relationId);
|
CitusTableCacheEntry *partitionEntry = GetCitusTableCacheEntry(relationId);
|
||||||
|
|
||||||
/* non-distributed tables do not have partition column */
|
/* non-distributed tables do not have partition column */
|
||||||
if (IsCitusTableTypeCacheEntry(partitionEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
if (!HasDistributionKeyCacheEntry(partitionEntry))
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
|
@ -3385,6 +3385,13 @@ GetAggregateType(Aggref *aggregateExpression)
|
||||||
{
|
{
|
||||||
Oid aggFunctionId = aggregateExpression->aggfnoid;
|
Oid aggFunctionId = aggregateExpression->aggfnoid;
|
||||||
|
|
||||||
|
/* custom aggregates with combine func take precedence over name-based logic */
|
||||||
|
if (aggFunctionId >= FirstNormalObjectId &&
|
||||||
|
AggregateEnabledCustom(aggregateExpression))
|
||||||
|
{
|
||||||
|
return AGGREGATE_CUSTOM_COMBINE;
|
||||||
|
}
|
||||||
|
|
||||||
/* look up the function name */
|
/* look up the function name */
|
||||||
char *aggregateProcName = get_func_name(aggFunctionId);
|
char *aggregateProcName = get_func_name(aggFunctionId);
|
||||||
if (aggregateProcName == NULL)
|
if (aggregateProcName == NULL)
|
||||||
|
@ -3395,8 +3402,6 @@ GetAggregateType(Aggref *aggregateExpression)
|
||||||
|
|
||||||
uint32 aggregateCount = lengthof(AggregateNames);
|
uint32 aggregateCount = lengthof(AggregateNames);
|
||||||
|
|
||||||
Assert(AGGREGATE_INVALID_FIRST == 0);
|
|
||||||
|
|
||||||
for (uint32 aggregateIndex = 1; aggregateIndex < aggregateCount; aggregateIndex++)
|
for (uint32 aggregateIndex = 1; aggregateIndex < aggregateCount; aggregateIndex++)
|
||||||
{
|
{
|
||||||
const char *aggregateName = AggregateNames[aggregateIndex];
|
const char *aggregateName = AggregateNames[aggregateIndex];
|
||||||
|
@ -3465,7 +3470,7 @@ GetAggregateType(Aggref *aggregateExpression)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* handle any remaining built-in aggregates with a suitable combinefn */
|
||||||
if (AggregateEnabledCustom(aggregateExpression))
|
if (AggregateEnabledCustom(aggregateExpression))
|
||||||
{
|
{
|
||||||
return AGGREGATE_CUSTOM_COMBINE;
|
return AGGREGATE_CUSTOM_COMBINE;
|
||||||
|
|
|
@ -228,7 +228,7 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
||||||
* If the expression belongs to a non-distributed table continue searching for
|
* If the expression belongs to a non-distributed table continue searching for
|
||||||
* other partition keys.
|
* other partition keys.
|
||||||
*/
|
*/
|
||||||
if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2199,7 +2199,7 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
|
||||||
Oid relationId = relationRestriction->relationId;
|
Oid relationId = relationRestriction->relationId;
|
||||||
|
|
||||||
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
|
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
|
||||||
if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
if (!HasDistributionKeyCacheEntry(cacheEntry))
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -2377,7 +2377,7 @@ ErrorIfUnsupportedShardDistribution(Query *query)
|
||||||
nonReferenceRelations = lappend_oid(nonReferenceRelations,
|
nonReferenceRelations = lappend_oid(nonReferenceRelations,
|
||||||
relationId);
|
relationId);
|
||||||
}
|
}
|
||||||
else if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
else if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
|
||||||
{
|
{
|
||||||
/* do not need to handle non-distributed tables */
|
/* do not need to handle non-distributed tables */
|
||||||
continue;
|
continue;
|
||||||
|
@ -2482,7 +2482,7 @@ QueryPushdownTaskCreate(Query *originalQuery, int shardIndex,
|
||||||
ShardInterval *shardInterval = NULL;
|
ShardInterval *shardInterval = NULL;
|
||||||
|
|
||||||
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
|
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
|
||||||
if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
if (!HasDistributionKeyCacheEntry(cacheEntry))
|
||||||
{
|
{
|
||||||
/* non-distributed tables have only one shard */
|
/* non-distributed tables have only one shard */
|
||||||
shardInterval = cacheEntry->sortedShardIntervalArray[0];
|
shardInterval = cacheEntry->sortedShardIntervalArray[0];
|
||||||
|
@ -3697,7 +3697,7 @@ PartitionedOnColumn(Var *column, List *rangeTableList, List *dependentJobList)
|
||||||
Var *partitionColumn = PartitionColumn(relationId, rangeTableId);
|
Var *partitionColumn = PartitionColumn(relationId, rangeTableId);
|
||||||
|
|
||||||
/* non-distributed tables do not have partition columns */
|
/* non-distributed tables do not have partition columns */
|
||||||
if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -4573,7 +4573,8 @@ RowModifyLevelForQuery(Query *query)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (commandType == CMD_UPDATE ||
|
if (commandType == CMD_UPDATE ||
|
||||||
commandType == CMD_DELETE)
|
commandType == CMD_DELETE ||
|
||||||
|
commandType == CMD_MERGE)
|
||||||
{
|
{
|
||||||
return ROW_MODIFY_NONCOMMUTATIVE;
|
return ROW_MODIFY_NONCOMMUTATIVE;
|
||||||
}
|
}
|
||||||
|
@ -5343,8 +5344,7 @@ ActiveShardPlacementLists(List *taskList)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CompareShardPlacements compares two shard placements by their tuple oid; this
|
* CompareShardPlacements compares two shard placements by placement id.
|
||||||
* oid reflects the tuple's insertion order into pg_dist_placement.
|
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
CompareShardPlacements(const void *leftElement, const void *rightElement)
|
CompareShardPlacements(const void *leftElement, const void *rightElement)
|
||||||
|
@ -5370,6 +5370,35 @@ CompareShardPlacements(const void *leftElement, const void *rightElement)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CompareGroupShardPlacements compares two group shard placements by placement id.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
CompareGroupShardPlacements(const void *leftElement, const void *rightElement)
|
||||||
|
{
|
||||||
|
const GroupShardPlacement *leftPlacement =
|
||||||
|
*((const GroupShardPlacement **) leftElement);
|
||||||
|
const GroupShardPlacement *rightPlacement =
|
||||||
|
*((const GroupShardPlacement **) rightElement);
|
||||||
|
|
||||||
|
uint64 leftPlacementId = leftPlacement->placementId;
|
||||||
|
uint64 rightPlacementId = rightPlacement->placementId;
|
||||||
|
|
||||||
|
if (leftPlacementId < rightPlacementId)
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
else if (leftPlacementId > rightPlacementId)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* LeftRotateList returns a copy of the given list that has been cyclically
|
* LeftRotateList returns a copy of the given list that has been cyclically
|
||||||
* shifted to the left by the given rotation count. For this, the function
|
* shifted to the left by the given rotation count. For this, the function
|
||||||
|
|
|
@ -28,11 +28,13 @@
|
||||||
#include "distributed/deparse_shard_query.h"
|
#include "distributed/deparse_shard_query.h"
|
||||||
#include "distributed/distribution_column.h"
|
#include "distributed/distribution_column.h"
|
||||||
#include "distributed/errormessage.h"
|
#include "distributed/errormessage.h"
|
||||||
|
#include "distributed/executor_util.h"
|
||||||
#include "distributed/log_utils.h"
|
#include "distributed/log_utils.h"
|
||||||
#include "distributed/insert_select_planner.h"
|
#include "distributed/insert_select_planner.h"
|
||||||
#include "distributed/intermediate_result_pruning.h"
|
#include "distributed/intermediate_result_pruning.h"
|
||||||
#include "distributed/metadata_utility.h"
|
#include "distributed/metadata_utility.h"
|
||||||
#include "distributed/coordinator_protocol.h"
|
#include "distributed/coordinator_protocol.h"
|
||||||
|
#include "distributed/merge_planner.h"
|
||||||
#include "distributed/metadata_cache.h"
|
#include "distributed/metadata_cache.h"
|
||||||
#include "distributed/multi_executor.h"
|
#include "distributed/multi_executor.h"
|
||||||
#include "distributed/multi_join_order.h"
|
#include "distributed/multi_join_order.h"
|
||||||
|
@ -113,6 +115,7 @@ typedef struct WalkerState
|
||||||
} WalkerState;
|
} WalkerState;
|
||||||
|
|
||||||
bool EnableRouterExecution = true;
|
bool EnableRouterExecution = true;
|
||||||
|
bool EnableNonColocatedRouterQueryPushdown = false;
|
||||||
|
|
||||||
|
|
||||||
/* planner functions forward declarations */
|
/* planner functions forward declarations */
|
||||||
|
@ -121,34 +124,24 @@ static void CreateSingleTaskRouterSelectPlan(DistributedPlan *distributedPlan,
|
||||||
Query *query,
|
Query *query,
|
||||||
PlannerRestrictionContext *
|
PlannerRestrictionContext *
|
||||||
plannerRestrictionContext);
|
plannerRestrictionContext);
|
||||||
static Oid ResultRelationOidForQuery(Query *query);
|
|
||||||
static bool IsTidColumn(Node *node);
|
static bool IsTidColumn(Node *node);
|
||||||
static DeferredErrorMessage * ModifyPartialQuerySupported(Query *queryTree, bool
|
static DeferredErrorMessage * ModifyPartialQuerySupported(Query *queryTree, bool
|
||||||
multiShardQuery,
|
multiShardQuery,
|
||||||
Oid *distributedTableId);
|
Oid *distributedTableId);
|
||||||
static bool NodeIsFieldStore(Node *node);
|
static DeferredErrorMessage * MultiShardUpdateDeleteSupported(Query *originalQuery,
|
||||||
static DeferredErrorMessage * MultiShardUpdateDeleteMergeSupported(Query *originalQuery,
|
|
||||||
PlannerRestrictionContext
|
PlannerRestrictionContext
|
||||||
*
|
*
|
||||||
plannerRestrictionContext);
|
plannerRestrictionContext);
|
||||||
static DeferredErrorMessage * SingleShardUpdateDeleteSupported(Query *originalQuery,
|
static DeferredErrorMessage * SingleShardUpdateDeleteSupported(Query *originalQuery,
|
||||||
PlannerRestrictionContext *
|
PlannerRestrictionContext *
|
||||||
plannerRestrictionContext);
|
plannerRestrictionContext);
|
||||||
static bool HasDangerousJoinUsing(List *rtableList, Node *jtnode);
|
|
||||||
static bool MasterIrreducibleExpression(Node *expression, bool *varArgument,
|
|
||||||
bool *badCoalesce);
|
|
||||||
static bool MasterIrreducibleExpressionWalker(Node *expression, WalkerState *state);
|
static bool MasterIrreducibleExpressionWalker(Node *expression, WalkerState *state);
|
||||||
static bool MasterIrreducibleExpressionFunctionChecker(Oid func_id, void *context);
|
static bool MasterIrreducibleExpressionFunctionChecker(Oid func_id, void *context);
|
||||||
static bool TargetEntryChangesValue(TargetEntry *targetEntry, Var *column,
|
|
||||||
FromExpr *joinTree);
|
|
||||||
static Job * RouterInsertJob(Query *originalQuery);
|
static Job * RouterInsertJob(Query *originalQuery);
|
||||||
static void ErrorIfNoShardsExist(CitusTableCacheEntry *cacheEntry);
|
static void ErrorIfNoShardsExist(CitusTableCacheEntry *cacheEntry);
|
||||||
static DeferredErrorMessage * DeferErrorIfModifyView(Query *queryTree);
|
static DeferredErrorMessage * DeferErrorIfModifyView(Query *queryTree);
|
||||||
static Job * CreateJob(Query *query);
|
static Job * CreateJob(Query *query);
|
||||||
static Task * CreateTask(TaskType taskType);
|
static Task * CreateTask(TaskType taskType);
|
||||||
static Job * RouterJob(Query *originalQuery,
|
|
||||||
PlannerRestrictionContext *plannerRestrictionContext,
|
|
||||||
DeferredErrorMessage **planningError);
|
|
||||||
static bool RelationPrunesToMultipleShards(List *relationShardList);
|
static bool RelationPrunesToMultipleShards(List *relationShardList);
|
||||||
static void NormalizeMultiRowInsertTargetList(Query *query);
|
static void NormalizeMultiRowInsertTargetList(Query *query);
|
||||||
static void AppendNextDummyColReference(Alias *expendedReferenceNames);
|
static void AppendNextDummyColReference(Alias *expendedReferenceNames);
|
||||||
|
@ -445,7 +438,7 @@ ModifyQueryResultRelationId(Query *query)
|
||||||
* ResultRelationOidForQuery returns the OID of the relation this is modified
|
* ResultRelationOidForQuery returns the OID of the relation this is modified
|
||||||
* by a given query.
|
* by a given query.
|
||||||
*/
|
*/
|
||||||
static Oid
|
Oid
|
||||||
ResultRelationOidForQuery(Query *query)
|
ResultRelationOidForQuery(Query *query)
|
||||||
{
|
{
|
||||||
RangeTblEntry *resultRTE = rt_fetch(query->resultRelation, query->rtable);
|
RangeTblEntry *resultRTE = rt_fetch(query->resultRelation, query->rtable);
|
||||||
|
@ -512,6 +505,161 @@ IsTidColumn(Node *node)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TargetlistAndFunctionsSupported implements a subset of what ModifyPartialQuerySupported
|
||||||
|
* checks, that subset being checking what functions are allowed, if we are
|
||||||
|
* updating distribution column, etc.
|
||||||
|
* Note: This subset of checks are repeated for each MERGE modify action.
|
||||||
|
*/
|
||||||
|
DeferredErrorMessage *
|
||||||
|
TargetlistAndFunctionsSupported(Oid resultRelationId, FromExpr *joinTree, Node *quals,
|
||||||
|
List *targetList,
|
||||||
|
CmdType commandType, List *returningList)
|
||||||
|
{
|
||||||
|
uint32 rangeTableId = 1;
|
||||||
|
Var *partitionColumn = NULL;
|
||||||
|
|
||||||
|
if (IsCitusTable(resultRelationId))
|
||||||
|
{
|
||||||
|
partitionColumn = PartitionColumn(resultRelationId, rangeTableId);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool hasVarArgument = false; /* A STABLE function is passed a Var argument */
|
||||||
|
bool hasBadCoalesce = false; /* CASE/COALESCE passed a mutable function */
|
||||||
|
ListCell *targetEntryCell = NULL;
|
||||||
|
|
||||||
|
foreach(targetEntryCell, targetList)
|
||||||
|
{
|
||||||
|
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
|
||||||
|
|
||||||
|
/* skip resjunk entries: UPDATE adds some for ctid, etc. */
|
||||||
|
if (targetEntry->resjunk)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool targetEntryPartitionColumn = false;
|
||||||
|
AttrNumber targetColumnAttrNumber = InvalidAttrNumber;
|
||||||
|
|
||||||
|
/* reference tables do not have partition column */
|
||||||
|
if (partitionColumn == NULL)
|
||||||
|
{
|
||||||
|
targetEntryPartitionColumn = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (commandType == CMD_UPDATE)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Note that it is not possible to give an alias to
|
||||||
|
* UPDATE table SET ...
|
||||||
|
*/
|
||||||
|
if (targetEntry->resname)
|
||||||
|
{
|
||||||
|
targetColumnAttrNumber = get_attnum(resultRelationId,
|
||||||
|
targetEntry->resname);
|
||||||
|
if (targetColumnAttrNumber == partitionColumn->varattno)
|
||||||
|
{
|
||||||
|
targetEntryPartitionColumn = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (commandType == CMD_UPDATE &&
|
||||||
|
FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
|
||||||
|
CitusIsVolatileFunction))
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"functions used in UPDATE queries on distributed "
|
||||||
|
"tables must not be VOLATILE",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (commandType == CMD_UPDATE && targetEntryPartitionColumn &&
|
||||||
|
TargetEntryChangesValue(targetEntry, partitionColumn,
|
||||||
|
joinTree))
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"modifying the partition value of rows is not "
|
||||||
|
"allowed",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (commandType == CMD_UPDATE &&
|
||||||
|
MasterIrreducibleExpression((Node *) targetEntry->expr,
|
||||||
|
&hasVarArgument, &hasBadCoalesce))
|
||||||
|
{
|
||||||
|
Assert(hasVarArgument || hasBadCoalesce);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
|
||||||
|
NodeIsFieldStore))
|
||||||
|
{
|
||||||
|
/* DELETE cannot do field indirection already */
|
||||||
|
Assert(commandType == CMD_UPDATE || commandType == CMD_INSERT);
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"inserting or modifying composite type fields is not "
|
||||||
|
"supported", NULL,
|
||||||
|
"Use the column name to insert or update the composite "
|
||||||
|
"type as a single value");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (joinTree != NULL)
|
||||||
|
{
|
||||||
|
if (FindNodeMatchingCheckFunction((Node *) quals,
|
||||||
|
CitusIsVolatileFunction))
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"functions used in the WHERE/ON/WHEN clause of modification "
|
||||||
|
"queries on distributed tables must not be VOLATILE",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
else if (MasterIrreducibleExpression(quals, &hasVarArgument,
|
||||||
|
&hasBadCoalesce))
|
||||||
|
{
|
||||||
|
Assert(hasVarArgument || hasBadCoalesce);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasVarArgument)
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"STABLE functions used in UPDATE queries "
|
||||||
|
"cannot be called with column references",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasBadCoalesce)
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"non-IMMUTABLE functions are not allowed in CASE or "
|
||||||
|
"COALESCE statements",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (contain_mutable_functions((Node *) returningList))
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"non-IMMUTABLE functions are not allowed in the "
|
||||||
|
"RETURNING clause",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (quals != NULL &&
|
||||||
|
nodeTag(quals) == T_CurrentOfExpr)
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"cannot run DML queries with cursors", NULL,
|
||||||
|
NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ModifyPartialQuerySupported implements a subset of what ModifyQuerySupported checks,
|
* ModifyPartialQuerySupported implements a subset of what ModifyQuerySupported checks,
|
||||||
* that subset being what's necessary to check modifying CTEs for.
|
* that subset being what's necessary to check modifying CTEs for.
|
||||||
|
@ -620,148 +768,21 @@ ModifyPartialQuerySupported(Query *queryTree, bool multiShardQuery,
|
||||||
|
|
||||||
Oid resultRelationId = ModifyQueryResultRelationId(queryTree);
|
Oid resultRelationId = ModifyQueryResultRelationId(queryTree);
|
||||||
*distributedTableIdOutput = resultRelationId;
|
*distributedTableIdOutput = resultRelationId;
|
||||||
uint32 rangeTableId = 1;
|
|
||||||
|
|
||||||
Var *partitionColumn = NULL;
|
|
||||||
if (IsCitusTable(resultRelationId))
|
|
||||||
{
|
|
||||||
partitionColumn = PartitionColumn(resultRelationId, rangeTableId);
|
|
||||||
}
|
|
||||||
commandType = queryTree->commandType;
|
commandType = queryTree->commandType;
|
||||||
if (commandType == CMD_INSERT || commandType == CMD_UPDATE ||
|
if (commandType == CMD_INSERT || commandType == CMD_UPDATE ||
|
||||||
commandType == CMD_DELETE)
|
commandType == CMD_DELETE)
|
||||||
{
|
{
|
||||||
bool hasVarArgument = false; /* A STABLE function is passed a Var argument */
|
deferredError =
|
||||||
bool hasBadCoalesce = false; /* CASE/COALESCE passed a mutable function */
|
TargetlistAndFunctionsSupported(resultRelationId,
|
||||||
FromExpr *joinTree = queryTree->jointree;
|
queryTree->jointree,
|
||||||
ListCell *targetEntryCell = NULL;
|
queryTree->jointree->quals,
|
||||||
|
queryTree->targetList,
|
||||||
foreach(targetEntryCell, queryTree->targetList)
|
commandType,
|
||||||
|
queryTree->returningList);
|
||||||
|
if (deferredError)
|
||||||
{
|
{
|
||||||
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
|
return deferredError;
|
||||||
|
|
||||||
/* skip resjunk entries: UPDATE adds some for ctid, etc. */
|
|
||||||
if (targetEntry->resjunk)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool targetEntryPartitionColumn = false;
|
|
||||||
AttrNumber targetColumnAttrNumber = InvalidAttrNumber;
|
|
||||||
|
|
||||||
/* reference tables do not have partition column */
|
|
||||||
if (partitionColumn == NULL)
|
|
||||||
{
|
|
||||||
targetEntryPartitionColumn = false;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (commandType == CMD_UPDATE)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Note that it is not possible to give an alias to
|
|
||||||
* UPDATE table SET ...
|
|
||||||
*/
|
|
||||||
if (targetEntry->resname)
|
|
||||||
{
|
|
||||||
targetColumnAttrNumber = get_attnum(resultRelationId,
|
|
||||||
targetEntry->resname);
|
|
||||||
if (targetColumnAttrNumber == partitionColumn->varattno)
|
|
||||||
{
|
|
||||||
targetEntryPartitionColumn = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (commandType == CMD_UPDATE &&
|
|
||||||
FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
|
|
||||||
CitusIsVolatileFunction))
|
|
||||||
{
|
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
|
||||||
"functions used in UPDATE queries on distributed "
|
|
||||||
"tables must not be VOLATILE",
|
|
||||||
NULL, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (commandType == CMD_UPDATE && targetEntryPartitionColumn &&
|
|
||||||
TargetEntryChangesValue(targetEntry, partitionColumn,
|
|
||||||
queryTree->jointree))
|
|
||||||
{
|
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
|
||||||
"modifying the partition value of rows is not "
|
|
||||||
"allowed",
|
|
||||||
NULL, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (commandType == CMD_UPDATE &&
|
|
||||||
MasterIrreducibleExpression((Node *) targetEntry->expr,
|
|
||||||
&hasVarArgument, &hasBadCoalesce))
|
|
||||||
{
|
|
||||||
Assert(hasVarArgument || hasBadCoalesce);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (FindNodeMatchingCheckFunction((Node *) targetEntry->expr,
|
|
||||||
NodeIsFieldStore))
|
|
||||||
{
|
|
||||||
/* DELETE cannot do field indirection already */
|
|
||||||
Assert(commandType == CMD_UPDATE || commandType == CMD_INSERT);
|
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
|
||||||
"inserting or modifying composite type fields is not "
|
|
||||||
"supported", NULL,
|
|
||||||
"Use the column name to insert or update the composite "
|
|
||||||
"type as a single value");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (joinTree != NULL)
|
|
||||||
{
|
|
||||||
if (FindNodeMatchingCheckFunction((Node *) joinTree->quals,
|
|
||||||
CitusIsVolatileFunction))
|
|
||||||
{
|
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
|
||||||
"functions used in the WHERE clause of modification "
|
|
||||||
"queries on distributed tables must not be VOLATILE",
|
|
||||||
NULL, NULL);
|
|
||||||
}
|
|
||||||
else if (MasterIrreducibleExpression(joinTree->quals, &hasVarArgument,
|
|
||||||
&hasBadCoalesce))
|
|
||||||
{
|
|
||||||
Assert(hasVarArgument || hasBadCoalesce);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hasVarArgument)
|
|
||||||
{
|
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
|
||||||
"STABLE functions used in UPDATE queries "
|
|
||||||
"cannot be called with column references",
|
|
||||||
NULL, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hasBadCoalesce)
|
|
||||||
{
|
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
|
||||||
"non-IMMUTABLE functions are not allowed in CASE or "
|
|
||||||
"COALESCE statements",
|
|
||||||
NULL, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (contain_mutable_functions((Node *) queryTree->returningList))
|
|
||||||
{
|
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
|
||||||
"non-IMMUTABLE functions are not allowed in the "
|
|
||||||
"RETURNING clause",
|
|
||||||
NULL, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (queryTree->jointree->quals != NULL &&
|
|
||||||
nodeTag(queryTree->jointree->quals) == T_CurrentOfExpr)
|
|
||||||
{
|
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
|
||||||
"cannot run DML queries with cursors", NULL,
|
|
||||||
NULL);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -866,7 +887,7 @@ IsLocallyAccessibleCitusLocalTable(Oid relationId)
|
||||||
/*
|
/*
|
||||||
* NodeIsFieldStore returns true if given Node is a FieldStore object.
|
* NodeIsFieldStore returns true if given Node is a FieldStore object.
|
||||||
*/
|
*/
|
||||||
static bool
|
bool
|
||||||
NodeIsFieldStore(Node *node)
|
NodeIsFieldStore(Node *node)
|
||||||
{
|
{
|
||||||
return node && IsA(node, FieldStore);
|
return node && IsA(node, FieldStore);
|
||||||
|
@ -888,7 +909,9 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer
|
||||||
PlannerRestrictionContext *plannerRestrictionContext)
|
PlannerRestrictionContext *plannerRestrictionContext)
|
||||||
{
|
{
|
||||||
Oid distributedTableId = InvalidOid;
|
Oid distributedTableId = InvalidOid;
|
||||||
DeferredErrorMessage *error = ModifyPartialQuerySupported(queryTree, multiShardQuery,
|
|
||||||
|
DeferredErrorMessage *error =
|
||||||
|
ModifyPartialQuerySupported(queryTree, multiShardQuery,
|
||||||
&distributedTableId);
|
&distributedTableId);
|
||||||
if (error)
|
if (error)
|
||||||
{
|
{
|
||||||
|
@ -953,19 +976,12 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
else if (rangeTableEntry->relkind == RELKIND_MATVIEW)
|
else if (rangeTableEntry->relkind == RELKIND_MATVIEW)
|
||||||
{
|
|
||||||
if (IsMergeAllowedOnRelation(originalQuery, rangeTableEntry))
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
"materialized views in "
|
"materialized views in "
|
||||||
"modify queries are not supported",
|
"modify queries are not supported",
|
||||||
NULL, NULL);
|
NULL, NULL);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
/* for other kinds of relations, check if it's distributed */
|
/* for other kinds of relations, check if it's distributed */
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1065,7 +1081,7 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer
|
||||||
|
|
||||||
if (multiShardQuery)
|
if (multiShardQuery)
|
||||||
{
|
{
|
||||||
errorMessage = MultiShardUpdateDeleteMergeSupported(
|
errorMessage = MultiShardUpdateDeleteSupported(
|
||||||
originalQuery,
|
originalQuery,
|
||||||
plannerRestrictionContext);
|
plannerRestrictionContext);
|
||||||
}
|
}
|
||||||
|
@ -1246,11 +1262,11 @@ ErrorIfOnConflictNotSupported(Query *queryTree)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* MultiShardUpdateDeleteMergeSupported returns the error message if the update/delete is
|
* MultiShardUpdateDeleteSupported returns the error message if the update/delete is
|
||||||
* not pushdownable, otherwise it returns NULL.
|
* not pushdownable, otherwise it returns NULL.
|
||||||
*/
|
*/
|
||||||
static DeferredErrorMessage *
|
static DeferredErrorMessage *
|
||||||
MultiShardUpdateDeleteMergeSupported(Query *originalQuery,
|
MultiShardUpdateDeleteSupported(Query *originalQuery,
|
||||||
PlannerRestrictionContext *plannerRestrictionContext)
|
PlannerRestrictionContext *plannerRestrictionContext)
|
||||||
{
|
{
|
||||||
DeferredErrorMessage *errorMessage = NULL;
|
DeferredErrorMessage *errorMessage = NULL;
|
||||||
|
@ -1282,7 +1298,8 @@ MultiShardUpdateDeleteMergeSupported(Query *originalQuery,
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
errorMessage = DeferErrorIfUnsupportedSubqueryPushdown(originalQuery,
|
errorMessage = DeferErrorIfUnsupportedSubqueryPushdown(
|
||||||
|
originalQuery,
|
||||||
plannerRestrictionContext);
|
plannerRestrictionContext);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1323,7 +1340,7 @@ SingleShardUpdateDeleteSupported(Query *originalQuery,
|
||||||
* HasDangerousJoinUsing search jointree for unnamed JOIN USING. Check the
|
* HasDangerousJoinUsing search jointree for unnamed JOIN USING. Check the
|
||||||
* implementation of has_dangerous_join_using in ruleutils.
|
* implementation of has_dangerous_join_using in ruleutils.
|
||||||
*/
|
*/
|
||||||
static bool
|
bool
|
||||||
HasDangerousJoinUsing(List *rtableList, Node *joinTreeNode)
|
HasDangerousJoinUsing(List *rtableList, Node *joinTreeNode)
|
||||||
{
|
{
|
||||||
if (IsA(joinTreeNode, RangeTblRef))
|
if (IsA(joinTreeNode, RangeTblRef))
|
||||||
|
@ -1427,7 +1444,7 @@ IsMergeQuery(Query *query)
|
||||||
* which do, but for now we just error out. That makes both the code and user-education
|
* which do, but for now we just error out. That makes both the code and user-education
|
||||||
* easier.
|
* easier.
|
||||||
*/
|
*/
|
||||||
static bool
|
bool
|
||||||
MasterIrreducibleExpression(Node *expression, bool *varArgument, bool *badCoalesce)
|
MasterIrreducibleExpression(Node *expression, bool *varArgument, bool *badCoalesce)
|
||||||
{
|
{
|
||||||
WalkerState data;
|
WalkerState data;
|
||||||
|
@ -1575,7 +1592,7 @@ MasterIrreducibleExpressionFunctionChecker(Oid func_id, void *context)
|
||||||
* expression is a value that is implied by the qualifiers of the join
|
* expression is a value that is implied by the qualifiers of the join
|
||||||
* tree, or the target entry sets a different column.
|
* tree, or the target entry sets a different column.
|
||||||
*/
|
*/
|
||||||
static bool
|
bool
|
||||||
TargetEntryChangesValue(TargetEntry *targetEntry, Var *column, FromExpr *joinTree)
|
TargetEntryChangesValue(TargetEntry *targetEntry, Var *column, FromExpr *joinTree)
|
||||||
{
|
{
|
||||||
bool isColumnValueChanged = true;
|
bool isColumnValueChanged = true;
|
||||||
|
@ -1796,7 +1813,7 @@ ExtractFirstCitusTableId(Query *query)
|
||||||
* RouterJob builds a Job to represent a single shard select/update/delete and
|
* RouterJob builds a Job to represent a single shard select/update/delete and
|
||||||
* multiple shard update/delete queries.
|
* multiple shard update/delete queries.
|
||||||
*/
|
*/
|
||||||
static Job *
|
Job *
|
||||||
RouterJob(Query *originalQuery, PlannerRestrictionContext *plannerRestrictionContext,
|
RouterJob(Query *originalQuery, PlannerRestrictionContext *plannerRestrictionContext,
|
||||||
DeferredErrorMessage **planningError)
|
DeferredErrorMessage **planningError)
|
||||||
{
|
{
|
||||||
|
@ -1846,8 +1863,8 @@ RouterJob(Query *originalQuery, PlannerRestrictionContext *plannerRestrictionCon
|
||||||
if (*planningError)
|
if (*planningError)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* For MERGE, we do _not_ plan anything other than Router job, let's
|
* For MERGE, we do _not_ plan any other router job than the MERGE job itself,
|
||||||
* not continue further down the lane in distributed planning, simply
|
* let's not continue further down the lane in distributed planning, simply
|
||||||
* bail out.
|
* bail out.
|
||||||
*/
|
*/
|
||||||
if (IsMergeQuery(originalQuery))
|
if (IsMergeQuery(originalQuery))
|
||||||
|
@ -2320,9 +2337,20 @@ PlanRouterQuery(Query *originalQuery,
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert(UpdateOrDeleteOrMergeQuery(originalQuery));
|
Assert(UpdateOrDeleteOrMergeQuery(originalQuery));
|
||||||
|
|
||||||
|
if (IsMergeQuery(originalQuery))
|
||||||
|
{
|
||||||
|
planningError = MergeQuerySupported(originalQuery,
|
||||||
|
isMultiShardQuery,
|
||||||
|
plannerRestrictionContext);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
planningError = ModifyQuerySupported(originalQuery, originalQuery,
|
planningError = ModifyQuerySupported(originalQuery, originalQuery,
|
||||||
isMultiShardQuery,
|
isMultiShardQuery,
|
||||||
plannerRestrictionContext);
|
plannerRestrictionContext);
|
||||||
|
}
|
||||||
|
|
||||||
if (planningError != NULL)
|
if (planningError != NULL)
|
||||||
{
|
{
|
||||||
return planningError;
|
return planningError;
|
||||||
|
@ -2643,7 +2671,7 @@ TargetShardIntervalForFastPathQuery(Query *query, bool *isMultiShardQuery,
|
||||||
{
|
{
|
||||||
Oid relationId = ExtractFirstCitusTableId(query);
|
Oid relationId = ExtractFirstCitusTableId(query);
|
||||||
|
|
||||||
if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
if (!HasDistributionKey(relationId))
|
||||||
{
|
{
|
||||||
/* we don't need to do shard pruning for non-distributed tables */
|
/* we don't need to do shard pruning for non-distributed tables */
|
||||||
return list_make1(LoadShardIntervalList(relationId));
|
return list_make1(LoadShardIntervalList(relationId));
|
||||||
|
@ -2936,7 +2964,7 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError)
|
||||||
Assert(query->commandType == CMD_INSERT);
|
Assert(query->commandType == CMD_INSERT);
|
||||||
|
|
||||||
/* reference tables and citus local tables can only have one shard */
|
/* reference tables and citus local tables can only have one shard */
|
||||||
if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
if (!HasDistributionKeyCacheEntry(cacheEntry))
|
||||||
{
|
{
|
||||||
List *shardIntervalList = LoadShardIntervalList(distributedTableId);
|
List *shardIntervalList = LoadShardIntervalList(distributedTableId);
|
||||||
|
|
||||||
|
@ -3477,7 +3505,7 @@ ExtractInsertPartitionKeyValue(Query *query)
|
||||||
uint32 rangeTableId = 1;
|
uint32 rangeTableId = 1;
|
||||||
Const *singlePartitionValueConst = NULL;
|
Const *singlePartitionValueConst = NULL;
|
||||||
|
|
||||||
if (IsCitusTableType(distributedTableId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
if (!HasDistributionKey(distributedTableId))
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -3589,6 +3617,8 @@ DeferErrorIfUnsupportedRouterPlannableSelectQuery(Query *query)
|
||||||
bool hasDistributedTable = false;
|
bool hasDistributedTable = false;
|
||||||
bool hasReferenceTable = false;
|
bool hasReferenceTable = false;
|
||||||
|
|
||||||
|
List *distributedRelationList = NIL;
|
||||||
|
|
||||||
ExtractRangeTableRelationWalker((Node *) query, &rangeTableRelationList);
|
ExtractRangeTableRelationWalker((Node *) query, &rangeTableRelationList);
|
||||||
foreach(rangeTableRelationCell, rangeTableRelationList)
|
foreach(rangeTableRelationCell, rangeTableRelationList)
|
||||||
{
|
{
|
||||||
|
@ -3626,6 +3656,8 @@ DeferErrorIfUnsupportedRouterPlannableSelectQuery(Query *query)
|
||||||
if (IsCitusTableType(distributedTableId, DISTRIBUTED_TABLE))
|
if (IsCitusTableType(distributedTableId, DISTRIBUTED_TABLE))
|
||||||
{
|
{
|
||||||
hasDistributedTable = true;
|
hasDistributedTable = true;
|
||||||
|
distributedRelationList = lappend_oid(distributedRelationList,
|
||||||
|
distributedTableId);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -3680,6 +3712,15 @@ DeferErrorIfUnsupportedRouterPlannableSelectQuery(Query *query)
|
||||||
NULL, NULL);
|
NULL, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!EnableNonColocatedRouterQueryPushdown &&
|
||||||
|
!AllDistributedRelationsInListColocated(distributedRelationList))
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"router planner does not support queries that "
|
||||||
|
"reference non-colocated distributed tables",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
#if PG_VERSION_NUM >= PG_VERSION_14
|
#if PG_VERSION_NUM >= PG_VERSION_14
|
||||||
DeferredErrorMessage *CTEWithSearchClauseError =
|
DeferredErrorMessage *CTEWithSearchClauseError =
|
||||||
ErrorIfQueryHasCTEWithSearchClause(query);
|
ErrorIfQueryHasCTEWithSearchClause(query);
|
||||||
|
@ -3797,8 +3838,7 @@ ErrorIfQueryHasUnroutableModifyingCTE(Query *queryTree)
|
||||||
CitusTableCacheEntry *modificationTableCacheEntry =
|
CitusTableCacheEntry *modificationTableCacheEntry =
|
||||||
GetCitusTableCacheEntry(distributedTableId);
|
GetCitusTableCacheEntry(distributedTableId);
|
||||||
|
|
||||||
if (IsCitusTableTypeCacheEntry(modificationTableCacheEntry,
|
if (!HasDistributionKeyCacheEntry(modificationTableCacheEntry))
|
||||||
CITUS_TABLE_WITH_NO_DIST_KEY))
|
|
||||||
{
|
{
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
"cannot router plan modification of a non-distributed table",
|
"cannot router plan modification of a non-distributed table",
|
||||||
|
|
|
@ -168,7 +168,7 @@ AnchorRte(Query *subquery)
|
||||||
{
|
{
|
||||||
Oid relationId = currentRte->relid;
|
Oid relationId = currentRte->relid;
|
||||||
|
|
||||||
if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Non-distributed tables should not be the anchor rte since they
|
* Non-distributed tables should not be the anchor rte since they
|
||||||
|
|
|
@ -591,10 +591,16 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
|
||||||
}
|
}
|
||||||
else if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext))
|
else if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext))
|
||||||
{
|
{
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
StringInfo errorMessage = makeStringInfo();
|
||||||
"complex joins are only supported when all distributed tables are "
|
bool isMergeCmd = IsMergeQuery(originalQuery);
|
||||||
|
appendStringInfo(errorMessage,
|
||||||
|
"%s"
|
||||||
|
"only supported when all distributed tables are "
|
||||||
"co-located and joined on their distribution columns",
|
"co-located and joined on their distribution columns",
|
||||||
NULL, NULL);
|
isMergeCmd ? "MERGE command is " : "complex joins are ");
|
||||||
|
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
errorMessage->data, NULL, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* we shouldn't allow reference tables in the FROM clause when the query has sublinks */
|
/* we shouldn't allow reference tables in the FROM clause when the query has sublinks */
|
||||||
|
|
|
@ -151,7 +151,8 @@ static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass
|
||||||
secondClass);
|
secondClass);
|
||||||
static Var * PartitionKeyForRTEIdentityInQuery(Query *query, int targetRTEIndex,
|
static Var * PartitionKeyForRTEIdentityInQuery(Query *query, int targetRTEIndex,
|
||||||
Index *partitionKeyIndex);
|
Index *partitionKeyIndex);
|
||||||
static bool AllRelationsInRestrictionContextColocated(RelationRestrictionContext *
|
static bool AllDistributedRelationsInRestrictionContextColocated(
|
||||||
|
RelationRestrictionContext *
|
||||||
restrictionContext);
|
restrictionContext);
|
||||||
static bool IsNotSafeRestrictionToRecursivelyPlan(Node *node);
|
static bool IsNotSafeRestrictionToRecursivelyPlan(Node *node);
|
||||||
static JoinRestrictionContext * FilterJoinRestrictionContext(
|
static JoinRestrictionContext * FilterJoinRestrictionContext(
|
||||||
|
@ -383,7 +384,7 @@ SafeToPushdownUnionSubquery(Query *originalQuery,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!AllRelationsInRestrictionContextColocated(restrictionContext))
|
if (!AllDistributedRelationsInRestrictionContextColocated(restrictionContext))
|
||||||
{
|
{
|
||||||
/* distribution columns are equal, but tables are not co-located */
|
/* distribution columns are equal, but tables are not co-located */
|
||||||
return false;
|
return false;
|
||||||
|
@ -703,8 +704,8 @@ EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList,
|
||||||
int rteIdentity = GetRTEIdentity(relationRestriction->rte);
|
int rteIdentity = GetRTEIdentity(relationRestriction->rte);
|
||||||
|
|
||||||
/* we shouldn't check for the equality of non-distributed tables */
|
/* we shouldn't check for the equality of non-distributed tables */
|
||||||
if (IsCitusTableType(relationRestriction->relationId,
|
if (IsCitusTable(relationRestriction->relationId) &&
|
||||||
CITUS_TABLE_WITH_NO_DIST_KEY))
|
!HasDistributionKey(relationRestriction->relationId))
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -1919,22 +1920,66 @@ FindQueryContainingRTEIdentityInternal(Node *node,
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* AllRelationsInRestrictionContextColocated determines whether all of the relations in the
|
* AllDistributedRelationsInRestrictionContextColocated determines whether all of the
|
||||||
* given relation restrictions list are co-located.
|
* distributed relations in the given relation restrictions list are co-located.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
AllRelationsInRestrictionContextColocated(RelationRestrictionContext *restrictionContext)
|
AllDistributedRelationsInRestrictionContextColocated(
|
||||||
|
RelationRestrictionContext *restrictionContext)
|
||||||
{
|
{
|
||||||
RelationRestriction *relationRestriction = NULL;
|
RelationRestriction *relationRestriction = NULL;
|
||||||
int initialColocationId = INVALID_COLOCATION_ID;
|
List *relationIdList = NIL;
|
||||||
|
|
||||||
/* check whether all relations exists in the main restriction list */
|
/* check whether all relations exists in the main restriction list */
|
||||||
foreach_ptr(relationRestriction, restrictionContext->relationRestrictionList)
|
foreach_ptr(relationRestriction, restrictionContext->relationRestrictionList)
|
||||||
{
|
{
|
||||||
Oid relationId = relationRestriction->relationId;
|
relationIdList = lappend_oid(relationIdList, relationRestriction->relationId);
|
||||||
|
}
|
||||||
|
|
||||||
if (IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
return AllDistributedRelationsInListColocated(relationIdList);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AllDistributedRelationsInRTEListColocated determines whether all of the
|
||||||
|
* distributed relations in the given RangeTableEntry list are co-located.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
AllDistributedRelationsInRTEListColocated(List *rangeTableEntryList)
|
||||||
|
{
|
||||||
|
RangeTblEntry *rangeTableEntry = NULL;
|
||||||
|
List *relationIdList = NIL;
|
||||||
|
|
||||||
|
foreach_ptr(rangeTableEntry, rangeTableEntryList)
|
||||||
{
|
{
|
||||||
|
relationIdList = lappend_oid(relationIdList, rangeTableEntry->relid);
|
||||||
|
}
|
||||||
|
|
||||||
|
return AllDistributedRelationsInListColocated(relationIdList);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AllDistributedRelationsInListColocated determines whether all of the
|
||||||
|
* distributed relations in the given list are co-located.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
AllDistributedRelationsInListColocated(List *relationList)
|
||||||
|
{
|
||||||
|
int initialColocationId = INVALID_COLOCATION_ID;
|
||||||
|
Oid relationId = InvalidOid;
|
||||||
|
|
||||||
|
foreach_oid(relationId, relationList)
|
||||||
|
{
|
||||||
|
if (!IsCitusTable(relationId))
|
||||||
|
{
|
||||||
|
/* not interested in Postgres tables */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!IsCitusTableType(relationId, DISTRIBUTED_TABLE))
|
||||||
|
{
|
||||||
|
/* not interested in non-distributed tables */
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -333,7 +333,7 @@ PruneShards(Oid relationId, Index rangeTableId, List *whereClauseList,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* short circuit for non-distributed tables such as reference table */
|
/* short circuit for non-distributed tables such as reference table */
|
||||||
if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
if (!HasDistributionKeyCacheEntry(cacheEntry))
|
||||||
{
|
{
|
||||||
prunedList = ShardArrayToList(cacheEntry->sortedShardIntervalArray,
|
prunedList = ShardArrayToList(cacheEntry->sortedShardIntervalArray,
|
||||||
cacheEntry->shardIntervalArrayLength);
|
cacheEntry->shardIntervalArrayLength);
|
||||||
|
|
|
@ -88,6 +88,8 @@ static const char *replicationSlotPrefix[] = {
|
||||||
* IMPORTANT: All the subscription names should start with "citus_". Otherwise
|
* IMPORTANT: All the subscription names should start with "citus_". Otherwise
|
||||||
* our utility hook does not defend against non-superusers altering or dropping
|
* our utility hook does not defend against non-superusers altering or dropping
|
||||||
* them, which is important for security purposes.
|
* them, which is important for security purposes.
|
||||||
|
*
|
||||||
|
* We should also keep these in sync with IsCitusShardTransferBackend().
|
||||||
*/
|
*/
|
||||||
static const char *subscriptionPrefix[] = {
|
static const char *subscriptionPrefix[] = {
|
||||||
[SHARD_MOVE] = "citus_shard_move_subscription_",
|
[SHARD_MOVE] = "citus_shard_move_subscription_",
|
||||||
|
@ -1338,7 +1340,9 @@ CreatePublications(MultiConnection *connection,
|
||||||
worker->groupId,
|
worker->groupId,
|
||||||
CLEANUP_ALWAYS);
|
CLEANUP_ALWAYS);
|
||||||
|
|
||||||
|
ExecuteCriticalRemoteCommand(connection, DISABLE_DDL_PROPAGATION);
|
||||||
ExecuteCriticalRemoteCommand(connection, createPublicationCommand->data);
|
ExecuteCriticalRemoteCommand(connection, createPublicationCommand->data);
|
||||||
|
ExecuteCriticalRemoteCommand(connection, ENABLE_DDL_PROPAGATION);
|
||||||
pfree(createPublicationCommand->data);
|
pfree(createPublicationCommand->data);
|
||||||
pfree(createPublicationCommand);
|
pfree(createPublicationCommand);
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,18 +10,27 @@
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
#include "distributed/shardinterval_utils.h"
|
#include "distributed/shardinterval_utils.h"
|
||||||
#include "distributed/shardsplit_shared_memory.h"
|
#include "distributed/shardsplit_shared_memory.h"
|
||||||
|
#include "distributed/worker_shard_visibility.h"
|
||||||
|
#include "distributed/worker_protocol.h"
|
||||||
#include "distributed/listutils.h"
|
#include "distributed/listutils.h"
|
||||||
|
#include "distributed/metadata/distobject.h"
|
||||||
#include "replication/logical.h"
|
#include "replication/logical.h"
|
||||||
#include "utils/typcache.h"
|
#include "utils/typcache.h"
|
||||||
|
#include "utils/lsyscache.h"
|
||||||
|
#include "catalog/pg_namespace.h"
|
||||||
|
|
||||||
extern void _PG_output_plugin_init(OutputPluginCallbacks *cb);
|
extern void _PG_output_plugin_init(OutputPluginCallbacks *cb);
|
||||||
static LogicalDecodeChangeCB pgoutputChangeCB;
|
static LogicalDecodeChangeCB pgOutputPluginChangeCB;
|
||||||
|
|
||||||
|
#define InvalidRepOriginId 0
|
||||||
|
|
||||||
static HTAB *SourceToDestinationShardMap = NULL;
|
static HTAB *SourceToDestinationShardMap = NULL;
|
||||||
|
static bool replication_origin_filter_cb(LogicalDecodingContext *ctx, RepOriginId
|
||||||
|
origin_id);
|
||||||
|
|
||||||
/* Plugin callback */
|
/* Plugin callback */
|
||||||
static void split_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
static void shard_split_change_cb(LogicalDecodingContext *ctx,
|
||||||
|
ReorderBufferTXN *txn,
|
||||||
Relation relation, ReorderBufferChange *change);
|
Relation relation, ReorderBufferChange *change);
|
||||||
|
|
||||||
/* Helper methods */
|
/* Helper methods */
|
||||||
|
@ -47,7 +56,8 @@ void
|
||||||
_PG_output_plugin_init(OutputPluginCallbacks *cb)
|
_PG_output_plugin_init(OutputPluginCallbacks *cb)
|
||||||
{
|
{
|
||||||
LogicalOutputPluginInit plugin_init =
|
LogicalOutputPluginInit plugin_init =
|
||||||
(LogicalOutputPluginInit) (void *) load_external_function("pgoutput",
|
(LogicalOutputPluginInit) (void *)
|
||||||
|
load_external_function("pgoutput",
|
||||||
"_PG_output_plugin_init",
|
"_PG_output_plugin_init",
|
||||||
false, NULL);
|
false, NULL);
|
||||||
|
|
||||||
|
@ -60,25 +70,56 @@ _PG_output_plugin_init(OutputPluginCallbacks *cb)
|
||||||
plugin_init(cb);
|
plugin_init(cb);
|
||||||
|
|
||||||
/* actual pgoutput callback will be called with the appropriate destination shard */
|
/* actual pgoutput callback will be called with the appropriate destination shard */
|
||||||
pgoutputChangeCB = cb->change_cb;
|
pgOutputPluginChangeCB = cb->change_cb;
|
||||||
cb->change_cb = split_change_cb;
|
cb->change_cb = shard_split_change_cb;
|
||||||
|
cb->filter_by_origin_cb = replication_origin_filter_cb;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* split_change function emits the incoming tuple change
|
* replication_origin_filter_cb call back function filters out publication of changes
|
||||||
|
* originated from any other node other than the current node. This is
|
||||||
|
* identified by the "origin_id" of the changes. The origin_id is set to
|
||||||
|
* a non-zero value in the origin node as part of WAL replication for internal
|
||||||
|
* operations like shard split/moves/create_distributed_table etc.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
replication_origin_filter_cb(LogicalDecodingContext *ctx, RepOriginId origin_id)
|
||||||
|
{
|
||||||
|
return (origin_id != InvalidRepOriginId);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* shard_split_change_cb function emits the incoming tuple change
|
||||||
* to the appropriate destination shard.
|
* to the appropriate destination shard.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
split_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
shard_split_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
||||||
Relation relation, ReorderBufferChange *change)
|
Relation relation, ReorderBufferChange *change)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* If Citus has not been loaded yet, pass the changes
|
||||||
|
* through to the undrelying decoder plugin.
|
||||||
|
*/
|
||||||
|
if (!CitusHasBeenLoaded())
|
||||||
|
{
|
||||||
|
pgOutputPluginChangeCB(ctx, txn, relation, change);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check if the relation is publishable.*/
|
||||||
if (!is_publishable_relation(relation))
|
if (!is_publishable_relation(relation))
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *replicationSlotName = ctx->slot->data.name.data;
|
char *replicationSlotName = ctx->slot->data.name.data;
|
||||||
|
if (replicationSlotName == NULL)
|
||||||
|
{
|
||||||
|
elog(ERROR, "Replication slot name is NULL!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize SourceToDestinationShardMap if not already initialized.
|
* Initialize SourceToDestinationShardMap if not already initialized.
|
||||||
|
@ -198,7 +239,7 @@ split_change_cb(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pgoutputChangeCB(ctx, txn, targetRelation, change);
|
pgOutputPluginChangeCB(ctx, txn, targetRelation, change);
|
||||||
RelationClose(targetRelation);
|
RelationClose(targetRelation);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -74,6 +74,7 @@
|
||||||
#include "distributed/recursive_planning.h"
|
#include "distributed/recursive_planning.h"
|
||||||
#include "distributed/reference_table_utils.h"
|
#include "distributed/reference_table_utils.h"
|
||||||
#include "distributed/relation_access_tracking.h"
|
#include "distributed/relation_access_tracking.h"
|
||||||
|
#include "distributed/replication_origin_session_utils.h"
|
||||||
#include "distributed/run_from_same_connection.h"
|
#include "distributed/run_from_same_connection.h"
|
||||||
#include "distributed/shard_cleaner.h"
|
#include "distributed/shard_cleaner.h"
|
||||||
#include "distributed/shard_transfer.h"
|
#include "distributed/shard_transfer.h"
|
||||||
|
@ -135,6 +136,8 @@ ReadColumnarOptions_type extern_ReadColumnarOptions = NULL;
|
||||||
CppConcat(extern_, funcname) = \
|
CppConcat(extern_, funcname) = \
|
||||||
(typename) (void *) lookup_external_function(handle, # funcname)
|
(typename) (void *) lookup_external_function(handle, # funcname)
|
||||||
|
|
||||||
|
#define CDC_DECODER_DYNAMIC_LIB_PATH "$libdir/citus_decoders:$libdir"
|
||||||
|
|
||||||
DEFINE_COLUMNAR_PASSTHROUGH_FUNC(columnar_handler)
|
DEFINE_COLUMNAR_PASSTHROUGH_FUNC(columnar_handler)
|
||||||
DEFINE_COLUMNAR_PASSTHROUGH_FUNC(alter_columnar_table_set)
|
DEFINE_COLUMNAR_PASSTHROUGH_FUNC(alter_columnar_table_set)
|
||||||
DEFINE_COLUMNAR_PASSTHROUGH_FUNC(alter_columnar_table_reset)
|
DEFINE_COLUMNAR_PASSTHROUGH_FUNC(alter_columnar_table_reset)
|
||||||
|
@ -206,7 +209,7 @@ static bool StatisticsCollectionGucCheckHook(bool *newval, void **extra, GucSour
|
||||||
source);
|
source);
|
||||||
static void CitusAuthHook(Port *port, int status);
|
static void CitusAuthHook(Port *port, int status);
|
||||||
static bool IsSuperuser(char *userName);
|
static bool IsSuperuser(char *userName);
|
||||||
|
static void AdjustDynamicLibraryPathForCdcDecoders(void);
|
||||||
|
|
||||||
static ClientAuthentication_hook_type original_client_auth_hook = NULL;
|
static ClientAuthentication_hook_type original_client_auth_hook = NULL;
|
||||||
|
|
||||||
|
@ -359,6 +362,11 @@ static const struct config_enum_entry cpu_priority_options[] = {
|
||||||
{ NULL, 0, false}
|
{ NULL, 0, false}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct config_enum_entry metadata_sync_mode_options[] = {
|
||||||
|
{ "transactional", METADATA_SYNC_TRANSACTIONAL, false },
|
||||||
|
{ "nontransactional", METADATA_SYNC_NON_TRANSACTIONAL, false },
|
||||||
|
{ NULL, 0, false }
|
||||||
|
};
|
||||||
|
|
||||||
/* *INDENT-ON* */
|
/* *INDENT-ON* */
|
||||||
|
|
||||||
|
@ -469,6 +477,17 @@ _PG_init(void)
|
||||||
InitializeLocallyReservedSharedConnections();
|
InitializeLocallyReservedSharedConnections();
|
||||||
InitializeClusterClockMem();
|
InitializeClusterClockMem();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Adjust the Dynamic Library Path to prepend citus_decodes to the dynamic
|
||||||
|
* library path. This is needed to make sure that the citus decoders are
|
||||||
|
* loaded before the default decoders for CDC.
|
||||||
|
*/
|
||||||
|
if (EnableChangeDataCapture)
|
||||||
|
{
|
||||||
|
AdjustDynamicLibraryPathForCdcDecoders();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* initialize shard split shared memory handle management */
|
/* initialize shard split shared memory handle management */
|
||||||
InitializeShardSplitSMHandleManagement();
|
InitializeShardSplitSMHandleManagement();
|
||||||
|
|
||||||
|
@ -536,6 +555,22 @@ _PG_init(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PrependCitusDecodersToDynamicLibrayPath prepends the $libdir/citus_decoders
|
||||||
|
* to the dynamic library path. This is needed to make sure that the citus
|
||||||
|
* decoders are loaded before the default decoders for CDC.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AdjustDynamicLibraryPathForCdcDecoders(void)
|
||||||
|
{
|
||||||
|
if (strcmp(Dynamic_library_path, "$libdir") == 0)
|
||||||
|
{
|
||||||
|
SetConfigOption("dynamic_library_path", CDC_DECODER_DYNAMIC_LIB_PATH,
|
||||||
|
PGC_POSTMASTER, PGC_S_OVERRIDE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#if PG_VERSION_NUM >= PG_VERSION_15
|
#if PG_VERSION_NUM >= PG_VERSION_15
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1132,6 +1167,16 @@ RegisterCitusConfigVariables(void)
|
||||||
GUC_STANDARD,
|
GUC_STANDARD,
|
||||||
NULL, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
|
|
||||||
|
DefineCustomBoolVariable(
|
||||||
|
"citus.enable_change_data_capture",
|
||||||
|
gettext_noop("Enables using replication origin tracking for change data capture"),
|
||||||
|
NULL,
|
||||||
|
&EnableChangeDataCapture,
|
||||||
|
false,
|
||||||
|
PGC_USERSET,
|
||||||
|
GUC_STANDARD,
|
||||||
|
NULL, NULL, NULL);
|
||||||
|
|
||||||
DefineCustomBoolVariable(
|
DefineCustomBoolVariable(
|
||||||
"citus.enable_cluster_clock",
|
"citus.enable_cluster_clock",
|
||||||
gettext_noop("When users explicitly call UDF citus_get_transaction_clock() "
|
gettext_noop("When users explicitly call UDF citus_get_transaction_clock() "
|
||||||
|
@ -1268,6 +1313,26 @@ RegisterCitusConfigVariables(void)
|
||||||
GUC_NO_SHOW_ALL,
|
GUC_NO_SHOW_ALL,
|
||||||
NULL, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
|
|
||||||
|
DefineCustomBoolVariable(
|
||||||
|
"citus.enable_non_colocated_router_query_pushdown",
|
||||||
|
gettext_noop("Enables router planner for the queries that reference "
|
||||||
|
"non-colocated distributed tables."),
|
||||||
|
gettext_noop("Normally, router planner planner is only enabled for "
|
||||||
|
"the queries that reference colocated distributed tables "
|
||||||
|
"because it is not guaranteed to have the target shards "
|
||||||
|
"always on the same node, e.g., after rebalancing the "
|
||||||
|
"shards. For this reason, while enabling this flag allows "
|
||||||
|
"some degree of optimization for the queries that reference "
|
||||||
|
"non-colocated distributed tables, it is not guaranteed "
|
||||||
|
"that the same query will work after rebalancing the shards "
|
||||||
|
"or altering the shard count of one of those distributed "
|
||||||
|
"tables."),
|
||||||
|
&EnableNonColocatedRouterQueryPushdown,
|
||||||
|
true,
|
||||||
|
PGC_USERSET,
|
||||||
|
GUC_NO_SHOW_ALL,
|
||||||
|
NULL, NULL, NULL);
|
||||||
|
|
||||||
DefineCustomBoolVariable(
|
DefineCustomBoolVariable(
|
||||||
"citus.enable_repartition_joins",
|
"citus.enable_repartition_joins",
|
||||||
gettext_noop("Allows Citus to repartition data between nodes."),
|
gettext_noop("Allows Citus to repartition data between nodes."),
|
||||||
|
@ -1849,6 +1914,21 @@ RegisterCitusConfigVariables(void)
|
||||||
GUC_UNIT_MS | GUC_NO_SHOW_ALL,
|
GUC_UNIT_MS | GUC_NO_SHOW_ALL,
|
||||||
NULL, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
|
|
||||||
|
DefineCustomEnumVariable(
|
||||||
|
"citus.metadata_sync_mode",
|
||||||
|
gettext_noop("Sets transaction mode for metadata syncs."),
|
||||||
|
gettext_noop("metadata sync can be run inside a single coordinated "
|
||||||
|
"transaction or with multiple small transactions in "
|
||||||
|
"idempotent way. By default we sync metadata in single "
|
||||||
|
"coordinated transaction. When we hit memory problems "
|
||||||
|
"at workers, we have alternative nontransactional mode "
|
||||||
|
"where we send each command with separate transaction."),
|
||||||
|
&MetadataSyncTransMode,
|
||||||
|
METADATA_SYNC_TRANSACTIONAL, metadata_sync_mode_options,
|
||||||
|
PGC_SUSET,
|
||||||
|
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL,
|
||||||
|
NULL, NULL, NULL);
|
||||||
|
|
||||||
DefineCustomIntVariable(
|
DefineCustomIntVariable(
|
||||||
"citus.metadata_sync_retry_interval",
|
"citus.metadata_sync_retry_interval",
|
||||||
gettext_noop("Sets the interval to retry failed metadata syncs."),
|
gettext_noop("Sets the interval to retry failed metadata syncs."),
|
||||||
|
@ -2406,7 +2486,6 @@ RegisterCitusConfigVariables(void)
|
||||||
GUC_STANDARD,
|
GUC_STANDARD,
|
||||||
NULL, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
|
|
||||||
|
|
||||||
/* warn about config items in the citus namespace that are not registered above */
|
/* warn about config items in the citus namespace that are not registered above */
|
||||||
EmitWarningsOnPlaceholders("citus");
|
EmitWarningsOnPlaceholders("citus");
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,12 @@
|
||||||
-- citus--11.2-1--11.3-1
|
-- citus--11.2-1--11.3-1
|
||||||
|
#include "udfs/repl_origin_helper/11.3-1.sql"
|
||||||
|
#include "udfs/worker_adjust_identity_column_seq_ranges/11.3-1.sql"
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_authinfo REPLICA IDENTITY USING INDEX pg_dist_authinfo_identification_index;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_partition REPLICA IDENTITY USING INDEX pg_dist_partition_logical_relid_index;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_placement REPLICA IDENTITY USING INDEX pg_dist_placement_placementid_index;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy REPLICA IDENTITY USING INDEX pg_dist_rebalance_strategy_name_key;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_shard REPLICA IDENTITY USING INDEX pg_dist_shard_shardid_index;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_transaction REPLICA IDENTITY USING INDEX pg_dist_transaction_unique_constraint;
|
||||||
|
|
||||||
-- bump version to 11.3-1
|
#include "udfs/worker_drop_all_shell_tables/11.3-1.sql"
|
||||||
|
#include "udfs/citus_internal_mark_node_not_synced/11.3-1.sql"
|
||||||
|
|
|
@ -1,2 +1,22 @@
|
||||||
-- citus--11.3-1--11.2-1
|
-- citus--11.3-1--11.2-1
|
||||||
-- this is an empty downgrade path since citus--11.2-1--11.3-1.sql is empty for now
|
|
||||||
|
DROP FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking();
|
||||||
|
DROP FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking();
|
||||||
|
DROP FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active();
|
||||||
|
DROP FUNCTION IF EXISTS pg_catalog.worker_adjust_identity_column_seq_ranges(regclass);
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_authinfo REPLICA IDENTITY NOTHING;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_partition REPLICA IDENTITY NOTHING;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_placement REPLICA IDENTITY NOTHING;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy REPLICA IDENTITY NOTHING;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_shard REPLICA IDENTITY NOTHING;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_transaction REPLICA IDENTITY NOTHING;
|
||||||
|
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_authinfo REPLICA IDENTITY NOTHING;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_partition REPLICA IDENTITY NOTHING;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_placement REPLICA IDENTITY NOTHING;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_rebalance_strategy REPLICA IDENTITY NOTHING;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_shard REPLICA IDENTITY NOTHING;
|
||||||
|
ALTER TABLE pg_catalog.pg_dist_transaction REPLICA IDENTITY NOTHING;
|
||||||
|
|
||||||
|
DROP PROCEDURE pg_catalog.worker_drop_all_shell_tables(bool);
|
||||||
|
DROP FUNCTION pg_catalog.citus_internal_mark_node_not_synced(int, int);
|
||||||
|
|
6
src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/11.3-1.sql
generated
Normal file
6
src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/11.3-1.sql
generated
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_mark_node_not_synced(parent_pid int, nodeid int)
|
||||||
|
RETURNS VOID
|
||||||
|
LANGUAGE C STRICT
|
||||||
|
AS 'MODULE_PATHNAME', $$citus_internal_mark_node_not_synced$$;
|
||||||
|
COMMENT ON FUNCTION citus_internal_mark_node_not_synced(int, int)
|
||||||
|
IS 'marks given node not synced by unsetting metadatasynced column at the start of the nontransactional sync.';
|
|
@ -0,0 +1,6 @@
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_mark_node_not_synced(parent_pid int, nodeid int)
|
||||||
|
RETURNS VOID
|
||||||
|
LANGUAGE C STRICT
|
||||||
|
AS 'MODULE_PATHNAME', $$citus_internal_mark_node_not_synced$$;
|
||||||
|
COMMENT ON FUNCTION citus_internal_mark_node_not_synced(int, int)
|
||||||
|
IS 'marks given node not synced by unsetting metadatasynced column at the start of the nontransactional sync.';
|
|
@ -0,0 +1,20 @@
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking()
|
||||||
|
RETURNS void
|
||||||
|
LANGUAGE C STRICT
|
||||||
|
AS 'MODULE_PATHNAME', $$citus_internal_start_replication_origin_tracking$$;
|
||||||
|
COMMENT ON FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking()
|
||||||
|
IS 'To start replication origin tracking for skipping publishing of duplicated events during internal data movements for CDC';
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking()
|
||||||
|
RETURNS void
|
||||||
|
LANGUAGE C STRICT
|
||||||
|
AS 'MODULE_PATHNAME', $$citus_internal_stop_replication_origin_tracking$$;
|
||||||
|
COMMENT ON FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking()
|
||||||
|
IS 'To stop replication origin tracking for skipping publishing of duplicated events during internal data movements for CDC';
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active()
|
||||||
|
RETURNS boolean
|
||||||
|
LANGUAGE C STRICT
|
||||||
|
AS 'MODULE_PATHNAME', $$citus_internal_is_replication_origin_tracking_active$$;
|
||||||
|
COMMENT ON FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active()
|
||||||
|
IS 'To check if replication origin tracking is active for skipping publishing of duplicated events during internal data movements for CDC';
|
|
@ -0,0 +1,20 @@
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking()
|
||||||
|
RETURNS void
|
||||||
|
LANGUAGE C STRICT
|
||||||
|
AS 'MODULE_PATHNAME', $$citus_internal_start_replication_origin_tracking$$;
|
||||||
|
COMMENT ON FUNCTION pg_catalog.citus_internal_start_replication_origin_tracking()
|
||||||
|
IS 'To start replication origin tracking for skipping publishing of duplicated events during internal data movements for CDC';
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking()
|
||||||
|
RETURNS void
|
||||||
|
LANGUAGE C STRICT
|
||||||
|
AS 'MODULE_PATHNAME', $$citus_internal_stop_replication_origin_tracking$$;
|
||||||
|
COMMENT ON FUNCTION pg_catalog.citus_internal_stop_replication_origin_tracking()
|
||||||
|
IS 'To stop replication origin tracking for skipping publishing of duplicated events during internal data movements for CDC';
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active()
|
||||||
|
RETURNS boolean
|
||||||
|
LANGUAGE C STRICT
|
||||||
|
AS 'MODULE_PATHNAME', $$citus_internal_is_replication_origin_tracking_active$$;
|
||||||
|
COMMENT ON FUNCTION pg_catalog.citus_internal_is_replication_origin_tracking_active()
|
||||||
|
IS 'To check if replication origin tracking is active for skipping publishing of duplicated events during internal data movements for CDC';
|
7
src/backend/distributed/sql/udfs/worker_adjust_identity_column_seq_ranges/11.3-1.sql
generated
Normal file
7
src/backend/distributed/sql/udfs/worker_adjust_identity_column_seq_ranges/11.3-1.sql
generated
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.worker_adjust_identity_column_seq_ranges(regclass)
|
||||||
|
RETURNS VOID
|
||||||
|
LANGUAGE C STRICT
|
||||||
|
AS 'MODULE_PATHNAME', $$worker_adjust_identity_column_seq_ranges$$;
|
||||||
|
COMMENT ON FUNCTION pg_catalog.worker_adjust_identity_column_seq_ranges(regclass)
|
||||||
|
IS 'modify identity column seq ranges to produce globally unique values';
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.worker_adjust_identity_column_seq_ranges(regclass)
|
||||||
|
RETURNS VOID
|
||||||
|
LANGUAGE C STRICT
|
||||||
|
AS 'MODULE_PATHNAME', $$worker_adjust_identity_column_seq_ranges$$;
|
||||||
|
COMMENT ON FUNCTION pg_catalog.worker_adjust_identity_column_seq_ranges(regclass)
|
||||||
|
IS 'modify identity column seq ranges to produce globally unique values';
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
-- During metadata sync, when we send many ddls over single transaction, worker node can error due
|
||||||
|
-- to reaching at max allocation block size for invalidation messages. To find a workaround for the problem,
|
||||||
|
-- we added nontransactional metadata sync mode where we create many transaction while dropping shell tables
|
||||||
|
-- via https://github.com/citusdata/citus/pull/6728.
|
||||||
|
CREATE OR REPLACE PROCEDURE pg_catalog.worker_drop_all_shell_tables(singleTransaction bool DEFAULT true)
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
table_name text;
|
||||||
|
BEGIN
|
||||||
|
-- drop shell tables within single or multiple transactions according to the flag singleTransaction
|
||||||
|
FOR table_name IN SELECT logicalrelid::regclass::text FROM pg_dist_partition
|
||||||
|
LOOP
|
||||||
|
PERFORM pg_catalog.worker_drop_shell_table(table_name);
|
||||||
|
IF not singleTransaction THEN
|
||||||
|
COMMIT;
|
||||||
|
END IF;
|
||||||
|
END LOOP;
|
||||||
|
END;
|
||||||
|
$$;
|
||||||
|
COMMENT ON PROCEDURE worker_drop_all_shell_tables(singleTransaction bool)
|
||||||
|
IS 'drop all distributed tables only without the metadata within single transaction or '
|
||||||
|
'multiple transaction specified by the flag singleTransaction';
|
|
@ -0,0 +1,23 @@
|
||||||
|
-- During metadata sync, when we send many ddls over single transaction, worker node can error due
|
||||||
|
-- to reaching at max allocation block size for invalidation messages. To find a workaround for the problem,
|
||||||
|
-- we added nontransactional metadata sync mode where we create many transaction while dropping shell tables
|
||||||
|
-- via https://github.com/citusdata/citus/pull/6728.
|
||||||
|
CREATE OR REPLACE PROCEDURE pg_catalog.worker_drop_all_shell_tables(singleTransaction bool DEFAULT true)
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
table_name text;
|
||||||
|
BEGIN
|
||||||
|
-- drop shell tables within single or multiple transactions according to the flag singleTransaction
|
||||||
|
FOR table_name IN SELECT logicalrelid::regclass::text FROM pg_dist_partition
|
||||||
|
LOOP
|
||||||
|
PERFORM pg_catalog.worker_drop_shell_table(table_name);
|
||||||
|
IF not singleTransaction THEN
|
||||||
|
COMMIT;
|
||||||
|
END IF;
|
||||||
|
END LOOP;
|
||||||
|
END;
|
||||||
|
$$;
|
||||||
|
COMMENT ON PROCEDURE worker_drop_all_shell_tables(singleTransaction bool)
|
||||||
|
IS 'drop all distributed tables only without the metadata within single transaction or '
|
||||||
|
'multiple transaction specified by the flag singleTransaction';
|
|
@ -49,26 +49,23 @@ activate_node_snapshot(PG_FUNCTION_ARGS)
|
||||||
*/
|
*/
|
||||||
WorkerNode *dummyWorkerNode = GetFirstPrimaryWorkerNode();
|
WorkerNode *dummyWorkerNode = GetFirstPrimaryWorkerNode();
|
||||||
|
|
||||||
List *updateLocalGroupCommand =
|
/*
|
||||||
list_make1(LocalGroupIdUpdateCommand(dummyWorkerNode->groupId));
|
* Create MetadataSyncContext which is used throughout nodes' activation.
|
||||||
List *syncDistObjCommands = SyncDistributedObjectsCommandList(dummyWorkerNode);
|
* As we set collectCommands to true, it would not create connections to workers.
|
||||||
List *dropSnapshotCommands = NodeMetadataDropCommands();
|
* Instead it would collect and return sync commands to be sent to workers.
|
||||||
List *createSnapshotCommands = NodeMetadataCreateCommands();
|
*/
|
||||||
List *pgDistTableMetadataSyncCommands = PgDistTableMetadataSyncCommandList();
|
bool collectCommands = true;
|
||||||
|
bool nodesAddedInSameTransaction = false;
|
||||||
|
MetadataSyncContext *context = CreateMetadataSyncContext(list_make1(dummyWorkerNode),
|
||||||
|
collectCommands,
|
||||||
|
nodesAddedInSameTransaction);
|
||||||
|
|
||||||
List *activateNodeCommandList = NIL;
|
ActivateNodeList(context);
|
||||||
|
|
||||||
|
List *activateNodeCommandList = context->collectedCommands;
|
||||||
int activateNodeCommandIndex = 0;
|
int activateNodeCommandIndex = 0;
|
||||||
Oid ddlCommandTypeId = TEXTOID;
|
Oid ddlCommandTypeId = TEXTOID;
|
||||||
|
|
||||||
activateNodeCommandList = list_concat(activateNodeCommandList,
|
|
||||||
updateLocalGroupCommand);
|
|
||||||
activateNodeCommandList = list_concat(activateNodeCommandList, syncDistObjCommands);
|
|
||||||
activateNodeCommandList = list_concat(activateNodeCommandList, dropSnapshotCommands);
|
|
||||||
activateNodeCommandList = list_concat(activateNodeCommandList,
|
|
||||||
createSnapshotCommands);
|
|
||||||
activateNodeCommandList = list_concat(activateNodeCommandList,
|
|
||||||
pgDistTableMetadataSyncCommands);
|
|
||||||
|
|
||||||
int activateNodeCommandCount = list_length(activateNodeCommandList);
|
int activateNodeCommandCount = list_length(activateNodeCommandList);
|
||||||
Datum *activateNodeCommandDatumArray = palloc0(activateNodeCommandCount *
|
Datum *activateNodeCommandDatumArray = palloc0(activateNodeCommandCount *
|
||||||
sizeof(Datum));
|
sizeof(Datum));
|
||||||
|
|
|
@ -147,6 +147,26 @@ shard_placement_rebalance_array(PG_FUNCTION_ARGS)
|
||||||
shardPlacementList = SortList(shardPlacementList, CompareShardPlacements);
|
shardPlacementList = SortList(shardPlacementList, CompareShardPlacements);
|
||||||
shardPlacementListList = lappend(shardPlacementListList, shardPlacementList);
|
shardPlacementListList = lappend(shardPlacementListList, shardPlacementList);
|
||||||
|
|
||||||
|
List *unbalancedShards = NIL;
|
||||||
|
ListCell *shardPlacementListCell = NULL;
|
||||||
|
foreach(shardPlacementListCell, shardPlacementListList)
|
||||||
|
{
|
||||||
|
List *placementList = (List *) lfirst(shardPlacementListCell);
|
||||||
|
|
||||||
|
if (list_length(placementList) < list_length(workerNodeList))
|
||||||
|
{
|
||||||
|
unbalancedShards = list_concat(unbalancedShards,
|
||||||
|
placementList);
|
||||||
|
shardPlacementListList = foreach_delete_current(shardPlacementListList,
|
||||||
|
shardPlacementListCell);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (list_length(unbalancedShards) > 0)
|
||||||
|
{
|
||||||
|
shardPlacementListList = lappend(shardPlacementListList, unbalancedShards);
|
||||||
|
}
|
||||||
|
|
||||||
rebalancePlanFunctions.context = &context;
|
rebalancePlanFunctions.context = &context;
|
||||||
|
|
||||||
/* sort the lists to make the function more deterministic */
|
/* sort the lists to make the function more deterministic */
|
||||||
|
|
|
@ -1270,23 +1270,6 @@ MyBackendGotCancelledDueToDeadlock(bool clearState)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* MyBackendIsInDisributedTransaction returns true if MyBackendData
|
|
||||||
* is in a distributed transaction.
|
|
||||||
*/
|
|
||||||
bool
|
|
||||||
MyBackendIsInDisributedTransaction(void)
|
|
||||||
{
|
|
||||||
/* backend might not have used citus yet and thus not initialized backend data */
|
|
||||||
if (!MyBackendData)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return IsInDistributedTransaction(MyBackendData);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ActiveDistributedTransactionNumbers returns a list of pointers to
|
* ActiveDistributedTransactionNumbers returns a list of pointers to
|
||||||
* transaction numbers of distributed transactions that are in progress
|
* transaction numbers of distributed transactions that are in progress
|
||||||
|
@ -1452,6 +1435,21 @@ IsExternalClientBackend(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* IsRebalancerInitiatedBackend returns true if we are in a backend that citus
|
||||||
|
* rebalancer initiated.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
IsCitusShardTransferBackend(void)
|
||||||
|
{
|
||||||
|
int prefixLength = strlen(CITUS_SHARD_TRANSFER_APPLICATION_NAME_PREFIX);
|
||||||
|
|
||||||
|
return strncmp(application_name,
|
||||||
|
CITUS_SHARD_TRANSFER_APPLICATION_NAME_PREFIX,
|
||||||
|
prefixLength) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* DetermineCitusBackendType determines the type of backend based on the application_name.
|
* DetermineCitusBackendType determines the type of backend based on the application_name.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -195,7 +195,7 @@ RecordRelationAccessIfNonDistTable(Oid relationId, ShardPlacementAccessType acce
|
||||||
* recursively calling RecordRelationAccessBase(), so becareful about
|
* recursively calling RecordRelationAccessBase(), so becareful about
|
||||||
* removing this check.
|
* removing this check.
|
||||||
*/
|
*/
|
||||||
if (!IsCitusTableType(relationId, CITUS_TABLE_WITH_NO_DIST_KEY))
|
if (IsCitusTable(relationId) && HasDistributionKey(relationId))
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -732,8 +732,8 @@ CheckConflictingRelationAccesses(Oid relationId, ShardPlacementAccessType access
|
||||||
|
|
||||||
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
|
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
|
||||||
|
|
||||||
if (!(IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY) &&
|
if (HasDistributionKeyCacheEntry(cacheEntry) ||
|
||||||
cacheEntry->referencingRelationsViaForeignKey != NIL))
|
cacheEntry->referencingRelationsViaForeignKey == NIL)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -931,7 +931,7 @@ HoldsConflictingLockWithReferencedRelations(Oid relationId, ShardPlacementAccess
|
||||||
* We're only interested in foreign keys to reference tables and citus
|
* We're only interested in foreign keys to reference tables and citus
|
||||||
* local tables.
|
* local tables.
|
||||||
*/
|
*/
|
||||||
if (!IsCitusTableType(referencedRelation, CITUS_TABLE_WITH_NO_DIST_KEY))
|
if (IsCitusTable(referencedRelation) && HasDistributionKey(referencedRelation))
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -993,7 +993,7 @@ HoldsConflictingLockWithReferencingRelations(Oid relationId, ShardPlacementAcces
|
||||||
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
|
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
|
||||||
bool holdsConflictingLocks = false;
|
bool holdsConflictingLocks = false;
|
||||||
|
|
||||||
Assert(IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY));
|
Assert(!HasDistributionKeyCacheEntry(cacheEntry));
|
||||||
|
|
||||||
Oid referencingRelation = InvalidOid;
|
Oid referencingRelation = InvalidOid;
|
||||||
foreach_oid(referencingRelation, cacheEntry->referencingRelationsViaForeignKey)
|
foreach_oid(referencingRelation, cacheEntry->referencingRelationsViaForeignKey)
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
#include "distributed/multi_logical_replication.h"
|
#include "distributed/multi_logical_replication.h"
|
||||||
#include "distributed/multi_explain.h"
|
#include "distributed/multi_explain.h"
|
||||||
#include "distributed/repartition_join_execution.h"
|
#include "distributed/repartition_join_execution.h"
|
||||||
|
#include "distributed/replication_origin_session_utils.h"
|
||||||
#include "distributed/transaction_management.h"
|
#include "distributed/transaction_management.h"
|
||||||
#include "distributed/placement_connection.h"
|
#include "distributed/placement_connection.h"
|
||||||
#include "distributed/relation_access_tracking.h"
|
#include "distributed/relation_access_tracking.h"
|
||||||
|
@ -391,6 +392,9 @@ CoordinatedTransactionCallback(XactEvent event, void *arg)
|
||||||
ResetGlobalVariables();
|
ResetGlobalVariables();
|
||||||
ResetRelationAccessHash();
|
ResetRelationAccessHash();
|
||||||
|
|
||||||
|
/* Reset any local replication origin session since transaction has been aborted.*/
|
||||||
|
ResetReplicationOriginLocalSession();
|
||||||
|
|
||||||
/* empty the CitusXactCallbackContext to ensure we're not leaking memory */
|
/* empty the CitusXactCallbackContext to ensure we're not leaking memory */
|
||||||
MemoryContextReset(CitusXactCallbackContext);
|
MemoryContextReset(CitusXactCallbackContext);
|
||||||
|
|
||||||
|
@ -715,6 +719,8 @@ CoordinatedSubTransactionCallback(SubXactEvent event, SubTransactionId subId,
|
||||||
SetCreateCitusTransactionLevel(0);
|
SetCreateCitusTransactionLevel(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Reset any local replication origin session since subtransaction has been aborted.*/
|
||||||
|
ResetReplicationOriginLocalSession();
|
||||||
MemoryContextSwitchTo(previousContext);
|
MemoryContextSwitchTo(previousContext);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -374,6 +374,54 @@ SendCommandListToWorkerOutsideTransactionWithConnection(MultiConnection *workerC
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SendCommandListToWorkerListWithBareConnections sends the command list
|
||||||
|
* over the specified bare connections. This function is mainly useful to
|
||||||
|
* avoid opening an closing connections excessively by allowing reusing
|
||||||
|
* connections to send multiple separate bare commands. The function
|
||||||
|
* raises an error if any of the queries fail.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
SendCommandListToWorkerListWithBareConnections(List *workerConnectionList,
|
||||||
|
List *commandList)
|
||||||
|
{
|
||||||
|
Assert(!InCoordinatedTransaction());
|
||||||
|
Assert(!GetCoordinatedTransactionShouldUse2PC());
|
||||||
|
|
||||||
|
if (list_length(commandList) == 0 || list_length(workerConnectionList) == 0)
|
||||||
|
{
|
||||||
|
/* nothing to do */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In order to avoid round-trips per query in queryStringList,
|
||||||
|
* we join the string and send as a single command. Also,
|
||||||
|
* if there is only a single command, avoid additional call to
|
||||||
|
* StringJoin given that some strings can be quite large.
|
||||||
|
*/
|
||||||
|
char *stringToSend = (list_length(commandList) == 1) ?
|
||||||
|
linitial(commandList) : StringJoin(commandList, ';');
|
||||||
|
|
||||||
|
/* send commands in parallel */
|
||||||
|
MultiConnection *connection = NULL;
|
||||||
|
foreach_ptr(connection, workerConnectionList)
|
||||||
|
{
|
||||||
|
int querySent = SendRemoteCommand(connection, stringToSend);
|
||||||
|
if (querySent == 0)
|
||||||
|
{
|
||||||
|
ReportConnectionError(connection, ERROR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool failOnError = true;
|
||||||
|
foreach_ptr(connection, workerConnectionList)
|
||||||
|
{
|
||||||
|
ClearResults(connection, failOnError);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* SendCommandListToWorkerInCoordinatedTransaction opens connection to the node
|
* SendCommandListToWorkerInCoordinatedTransaction opens connection to the node
|
||||||
* with the given nodeName and nodePort. The commands are sent as part of the
|
* with the given nodeName and nodePort. The commands are sent as part of the
|
||||||
|
@ -390,6 +438,8 @@ SendMetadataCommandListToWorkerListInCoordinatedTransaction(List *workerNodeList
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ErrorIfAnyMetadataNodeOutOfSync(workerNodeList);
|
||||||
|
|
||||||
UseCoordinatedTransaction();
|
UseCoordinatedTransaction();
|
||||||
|
|
||||||
List *connectionList = NIL;
|
List *connectionList = NIL;
|
||||||
|
|
|
@ -442,8 +442,7 @@ ShardsIntervalsEqual(ShardInterval *leftShardInterval, ShardInterval *rightShard
|
||||||
{
|
{
|
||||||
return HashPartitionedShardIntervalsEqual(leftShardInterval, rightShardInterval);
|
return HashPartitionedShardIntervalsEqual(leftShardInterval, rightShardInterval);
|
||||||
}
|
}
|
||||||
else if (IsCitusTableType(leftShardInterval->relationId,
|
else if (!HasDistributionKey(leftShardInterval->relationId))
|
||||||
CITUS_TABLE_WITH_NO_DIST_KEY))
|
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Reference tables has only a single shard and all reference tables
|
* Reference tables has only a single shard and all reference tables
|
||||||
|
|
|
@ -503,12 +503,11 @@ GetReferenceTableColocationId()
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* DeleteAllReplicatedTablePlacementsFromNodeGroup function iterates over
|
* GetAllReplicatedTableList returns all tables which has replicated placements.
|
||||||
* list of reference and replicated hash distributed tables and deletes
|
* i.e. (all reference tables) + (distributed tables with more than 1 placements)
|
||||||
* all placements from pg_dist_placement table for given group.
|
|
||||||
*/
|
*/
|
||||||
void
|
List *
|
||||||
DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly)
|
GetAllReplicatedTableList(void)
|
||||||
{
|
{
|
||||||
List *referenceTableList = CitusTableTypeIdList(REFERENCE_TABLE);
|
List *referenceTableList = CitusTableTypeIdList(REFERENCE_TABLE);
|
||||||
List *replicatedMetadataSyncedDistributedTableList =
|
List *replicatedMetadataSyncedDistributedTableList =
|
||||||
|
@ -517,13 +516,25 @@ DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly)
|
||||||
List *replicatedTableList =
|
List *replicatedTableList =
|
||||||
list_concat(referenceTableList, replicatedMetadataSyncedDistributedTableList);
|
list_concat(referenceTableList, replicatedMetadataSyncedDistributedTableList);
|
||||||
|
|
||||||
/* if there are no reference tables, we do not need to do anything */
|
return replicatedTableList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ReplicatedPlacementsForNodeGroup filters all replicated placements for given
|
||||||
|
* node group id.
|
||||||
|
*/
|
||||||
|
List *
|
||||||
|
ReplicatedPlacementsForNodeGroup(int32 groupId)
|
||||||
|
{
|
||||||
|
List *replicatedTableList = GetAllReplicatedTableList();
|
||||||
|
|
||||||
if (list_length(replicatedTableList) == 0)
|
if (list_length(replicatedTableList) == 0)
|
||||||
{
|
{
|
||||||
return;
|
return NIL;
|
||||||
}
|
}
|
||||||
|
|
||||||
StringInfo deletePlacementCommand = makeStringInfo();
|
List *replicatedPlacementsForNodeGroup = NIL;
|
||||||
Oid replicatedTableId = InvalidOid;
|
Oid replicatedTableId = InvalidOid;
|
||||||
foreach_oid(replicatedTableId, replicatedTableList)
|
foreach_oid(replicatedTableId, replicatedTableList)
|
||||||
{
|
{
|
||||||
|
@ -538,25 +549,104 @@ DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
replicatedPlacementsForNodeGroup = list_concat(replicatedPlacementsForNodeGroup,
|
||||||
|
placements);
|
||||||
|
}
|
||||||
|
|
||||||
|
return replicatedPlacementsForNodeGroup;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DeleteShardPlacementCommand returns a command for deleting given placement from
|
||||||
|
* metadata.
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
DeleteShardPlacementCommand(uint64 placementId)
|
||||||
|
{
|
||||||
|
StringInfo deletePlacementCommand = makeStringInfo();
|
||||||
|
appendStringInfo(deletePlacementCommand,
|
||||||
|
"DELETE FROM pg_catalog.pg_dist_placement "
|
||||||
|
"WHERE placementid = " UINT64_FORMAT, placementId);
|
||||||
|
return deletePlacementCommand->data;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DeleteAllReplicatedTablePlacementsFromNodeGroup function iterates over
|
||||||
|
* list of reference and replicated hash distributed tables and deletes
|
||||||
|
* all placements from pg_dist_placement table for given group.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly)
|
||||||
|
{
|
||||||
|
List *replicatedPlacementListForGroup = ReplicatedPlacementsForNodeGroup(groupId);
|
||||||
|
|
||||||
|
/* if there are no replicated tables for the group, we do not need to do anything */
|
||||||
|
if (list_length(replicatedPlacementListForGroup) == 0)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
GroupShardPlacement *placement = NULL;
|
GroupShardPlacement *placement = NULL;
|
||||||
foreach_ptr(placement, placements)
|
foreach_ptr(placement, replicatedPlacementListForGroup)
|
||||||
{
|
{
|
||||||
LockShardDistributionMetadata(placement->shardId, ExclusiveLock);
|
LockShardDistributionMetadata(placement->shardId, ExclusiveLock);
|
||||||
|
|
||||||
|
if (!localOnly)
|
||||||
|
{
|
||||||
|
char *deletePlacementCommand =
|
||||||
|
DeleteShardPlacementCommand(placement->placementId);
|
||||||
|
|
||||||
|
SendCommandToWorkersWithMetadata(deletePlacementCommand);
|
||||||
|
}
|
||||||
|
|
||||||
DeleteShardPlacementRow(placement->placementId);
|
DeleteShardPlacementRow(placement->placementId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DeleteAllReplicatedTablePlacementsFromNodeGroupViaMetadataContext does the same as
|
||||||
|
* DeleteAllReplicatedTablePlacementsFromNodeGroup except it uses metadataSyncContext for
|
||||||
|
* connections.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
DeleteAllReplicatedTablePlacementsFromNodeGroupViaMetadataContext(
|
||||||
|
MetadataSyncContext *context, int32 groupId, bool localOnly)
|
||||||
|
{
|
||||||
|
List *replicatedPlacementListForGroup = ReplicatedPlacementsForNodeGroup(groupId);
|
||||||
|
|
||||||
|
/* if there are no replicated tables for the group, we do not need to do anything */
|
||||||
|
if (list_length(replicatedPlacementListForGroup) == 0)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
MemoryContext oldContext = MemoryContextSwitchTo(context->context);
|
||||||
|
GroupShardPlacement *placement = NULL;
|
||||||
|
foreach_ptr(placement, replicatedPlacementListForGroup)
|
||||||
|
{
|
||||||
|
LockShardDistributionMetadata(placement->shardId, ExclusiveLock);
|
||||||
|
|
||||||
if (!localOnly)
|
if (!localOnly)
|
||||||
{
|
{
|
||||||
resetStringInfo(deletePlacementCommand);
|
char *deletePlacementCommand =
|
||||||
appendStringInfo(deletePlacementCommand,
|
DeleteShardPlacementCommand(placement->placementId);
|
||||||
"DELETE FROM pg_catalog.pg_dist_placement "
|
|
||||||
"WHERE placementid = " UINT64_FORMAT,
|
|
||||||
placement->placementId);
|
|
||||||
|
|
||||||
SendCommandToWorkersWithMetadata(deletePlacementCommand->data);
|
SendOrCollectCommandListToMetadataNodes(context,
|
||||||
|
list_make1(deletePlacementCommand));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* do not execute local transaction if we collect commands */
|
||||||
|
if (!MetadataSyncCollectsCommands(context))
|
||||||
|
{
|
||||||
|
DeleteShardPlacementRow(placement->placementId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ResetMetadataSyncMemoryContext(context);
|
||||||
}
|
}
|
||||||
|
MemoryContextSwitchTo(oldContext);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,239 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* replication_origin_session_utils.c
|
||||||
|
* Functions for managing replication origin session.
|
||||||
|
*
|
||||||
|
* Copyright (c) Citus Data, Inc.
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "distributed/replication_origin_session_utils.h"
|
||||||
|
#include "distributed/remote_commands.h"
|
||||||
|
#include "distributed/metadata_cache.h"
|
||||||
|
#include "utils/builtins.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
|
||||||
|
static bool IsRemoteReplicationOriginSessionSetup(MultiConnection *connection);
|
||||||
|
|
||||||
|
static void SetupMemoryContextResetReplicationOriginHandler(void);
|
||||||
|
|
||||||
|
static void SetupReplicationOriginSessionHelper(bool isContexResetSetupNeeded);
|
||||||
|
|
||||||
|
static inline bool IsLocalReplicationOriginSessionActive(void);
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(citus_internal_start_replication_origin_tracking);
|
||||||
|
PG_FUNCTION_INFO_V1(citus_internal_stop_replication_origin_tracking);
|
||||||
|
PG_FUNCTION_INFO_V1(citus_internal_is_replication_origin_tracking_active);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This variable is used to remember the replication origin id of the current session
|
||||||
|
* before resetting it to DoNotReplicateId in SetupReplicationOriginLocalSession.
|
||||||
|
*/
|
||||||
|
static RepOriginId OriginalOriginId = InvalidRepOriginId;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setting that controls whether replication origin tracking is enabled
|
||||||
|
*/
|
||||||
|
bool EnableChangeDataCapture = false;
|
||||||
|
|
||||||
|
|
||||||
|
/* citus_internal_start_replication_origin_tracking starts a new replication origin session
|
||||||
|
* in the local node. This function is used to avoid publishing the WAL records to the
|
||||||
|
* replication slot by setting replication origin to DoNotReplicateId in WAL records.
|
||||||
|
* It remembers the previous replication origin for the current session which will be
|
||||||
|
* used to reset the replication origin to the previous value when the session ends.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
citus_internal_start_replication_origin_tracking(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
if (!EnableChangeDataCapture)
|
||||||
|
{
|
||||||
|
PG_RETURN_VOID();
|
||||||
|
}
|
||||||
|
SetupReplicationOriginSessionHelper(false);
|
||||||
|
PG_RETURN_VOID();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* citus_internal_stop_replication_origin_tracking ends the current replication origin session
|
||||||
|
* in the local node. This function is used to reset the replication origin to the
|
||||||
|
* earlier value of replication origin.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
citus_internal_stop_replication_origin_tracking(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
ResetReplicationOriginLocalSession();
|
||||||
|
PG_RETURN_VOID();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* citus_internal_is_replication_origin_tracking_active checks if the current replication origin
|
||||||
|
* session is active in the local node.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
citus_internal_is_replication_origin_tracking_active(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
bool result = IsLocalReplicationOriginSessionActive();
|
||||||
|
PG_RETURN_BOOL(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* IsLocalReplicationOriginSessionActive checks if the current replication origin
|
||||||
|
* session is active in the local node.
|
||||||
|
*/
|
||||||
|
inline bool
|
||||||
|
IsLocalReplicationOriginSessionActive(void)
|
||||||
|
{
|
||||||
|
return (replorigin_session_origin == DoNotReplicateId);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SetupMemoryContextResetReplicationOriginHandler registers a callback function
|
||||||
|
* that resets the replication origin session in case of any error for the current
|
||||||
|
* memory context.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
SetupMemoryContextResetReplicationOriginHandler()
|
||||||
|
{
|
||||||
|
MemoryContextCallback *replicationOriginResetCallback = palloc0(
|
||||||
|
sizeof(MemoryContextCallback));
|
||||||
|
replicationOriginResetCallback->func =
|
||||||
|
ResetReplicationOriginLocalSessionCallbackHandler;
|
||||||
|
replicationOriginResetCallback->arg = NULL;
|
||||||
|
MemoryContextRegisterResetCallback(CurrentMemoryContext,
|
||||||
|
replicationOriginResetCallback);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SetupReplicationOriginSessionHelper sets up a new replication origin session in a
|
||||||
|
* local session. It takes an argument isContexResetSetupNeeded to decide whether
|
||||||
|
* to register a callback function that resets the replication origin session in case
|
||||||
|
* of any error for the current memory context.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
SetupReplicationOriginSessionHelper(bool isContexResetSetupNeeded)
|
||||||
|
{
|
||||||
|
if (!EnableChangeDataCapture)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
OriginalOriginId = replorigin_session_origin;
|
||||||
|
replorigin_session_origin = DoNotReplicateId;
|
||||||
|
if (isContexResetSetupNeeded)
|
||||||
|
{
|
||||||
|
SetupMemoryContextResetReplicationOriginHandler();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SetupReplicationOriginLocalSession sets up a new replication origin session in a
|
||||||
|
* local session.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
SetupReplicationOriginLocalSession()
|
||||||
|
{
|
||||||
|
SetupReplicationOriginSessionHelper(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ResetReplicationOriginLocalSession resets the replication origin session in a
|
||||||
|
* local node.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ResetReplicationOriginLocalSession(void)
|
||||||
|
{
|
||||||
|
if (replorigin_session_origin != DoNotReplicateId)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
replorigin_session_origin = OriginalOriginId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ResetReplicationOriginLocalSessionCallbackHandler is a callback function that
|
||||||
|
* resets the replication origin session in a local node. This is used to register
|
||||||
|
* with MemoryContextRegisterResetCallback to reset the replication origin session
|
||||||
|
* in case of any error for the given memory context.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ResetReplicationOriginLocalSessionCallbackHandler(void *arg)
|
||||||
|
{
|
||||||
|
ResetReplicationOriginLocalSession();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SetupReplicationOriginRemoteSession sets up a new replication origin session in a
|
||||||
|
* remote session. The identifier is used to create a unique replication origin name
|
||||||
|
* for the session in the remote node.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
SetupReplicationOriginRemoteSession(MultiConnection *connection)
|
||||||
|
{
|
||||||
|
if (!EnableChangeDataCapture)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (connection != NULL && !IsRemoteReplicationOriginSessionSetup(connection))
|
||||||
|
{
|
||||||
|
StringInfo replicationOriginSessionSetupQuery = makeStringInfo();
|
||||||
|
appendStringInfo(replicationOriginSessionSetupQuery,
|
||||||
|
"select pg_catalog.citus_internal_start_replication_origin_tracking();");
|
||||||
|
ExecuteCriticalRemoteCommand(connection,
|
||||||
|
replicationOriginSessionSetupQuery->data);
|
||||||
|
connection->isReplicationOriginSessionSetup = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ResetReplicationOriginRemoteSession resets the replication origin session in a
|
||||||
|
* remote node.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ResetReplicationOriginRemoteSession(MultiConnection *connection)
|
||||||
|
{
|
||||||
|
if (connection != NULL && connection->isReplicationOriginSessionSetup)
|
||||||
|
{
|
||||||
|
StringInfo replicationOriginSessionResetQuery = makeStringInfo();
|
||||||
|
appendStringInfo(replicationOriginSessionResetQuery,
|
||||||
|
"select pg_catalog.citus_internal_stop_replication_origin_tracking();");
|
||||||
|
ExecuteCriticalRemoteCommand(connection,
|
||||||
|
replicationOriginSessionResetQuery->data);
|
||||||
|
connection->isReplicationOriginSessionSetup = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* IsRemoteReplicationOriginSessionSetup checks if the replication origin is setup
|
||||||
|
* already in the remote session by calliing the UDF
|
||||||
|
* citus_internal_is_replication_origin_tracking_active(). This is also remembered
|
||||||
|
* in the connection object to avoid calling the UDF again next time.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
IsRemoteReplicationOriginSessionSetup(MultiConnection *connection)
|
||||||
|
{
|
||||||
|
if (connection->isReplicationOriginSessionSetup)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
StringInfo isReplicationOriginSessionSetupQuery = makeStringInfo();
|
||||||
|
appendStringInfo(isReplicationOriginSessionSetupQuery,
|
||||||
|
"SELECT pg_catalog.citus_internal_is_replication_origin_tracking_active()");
|
||||||
|
bool result =
|
||||||
|
ExecuteRemoteCommandAndCheckResult(connection,
|
||||||
|
isReplicationOriginSessionSetupQuery->data,
|
||||||
|
"t");
|
||||||
|
|
||||||
|
connection->isReplicationOriginSessionSetup = result;
|
||||||
|
return result;
|
||||||
|
}
|
|
@ -503,45 +503,6 @@ SetLocktagForShardDistributionMetadata(int64 shardId, LOCKTAG *tag)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* LockPlacementCleanup takes an exclusive lock to ensure that only one process
|
|
||||||
* can cleanup placements at the same time.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
LockPlacementCleanup(void)
|
|
||||||
{
|
|
||||||
LOCKTAG tag;
|
|
||||||
const bool sessionLock = false;
|
|
||||||
const bool dontWait = false;
|
|
||||||
|
|
||||||
/* Moves acquire lock with a constant operation id CITUS_SHARD_MOVE.
|
|
||||||
* This will change as we add support for parallel moves.
|
|
||||||
*/
|
|
||||||
SET_LOCKTAG_CITUS_OPERATION(tag, CITUS_SHARD_MOVE);
|
|
||||||
(void) LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* TryLockPlacementCleanup takes an exclusive lock to ensure that only one
|
|
||||||
* process can cleanup placements at the same time.
|
|
||||||
*/
|
|
||||||
bool
|
|
||||||
TryLockPlacementCleanup(void)
|
|
||||||
{
|
|
||||||
LOCKTAG tag;
|
|
||||||
const bool sessionLock = false;
|
|
||||||
const bool dontWait = true;
|
|
||||||
|
|
||||||
/* Moves acquire lock with a constant operation id CITUS_SHARD_MOVE.
|
|
||||||
* This will change as we add support for parallel moves.
|
|
||||||
*/
|
|
||||||
SET_LOCKTAG_CITUS_OPERATION(tag, CITUS_SHARD_MOVE);
|
|
||||||
bool lockAcquired = LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait);
|
|
||||||
return lockAcquired;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* LockReferencedReferenceShardDistributionMetadata acquires shard distribution
|
* LockReferencedReferenceShardDistributionMetadata acquires shard distribution
|
||||||
* metadata locks with the given lock mode on the reference tables which has a
|
* metadata locks with the given lock mode on the reference tables which has a
|
||||||
|
|
|
@ -223,8 +223,7 @@ ShardIndex(ShardInterval *shardInterval)
|
||||||
* currently it is not required.
|
* currently it is not required.
|
||||||
*/
|
*/
|
||||||
if (!IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) &&
|
if (!IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) &&
|
||||||
!IsCitusTableTypeCacheEntry(
|
HasDistributionKeyCacheEntry(cacheEntry))
|
||||||
cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
errmsg("finding index of a given shard is only supported for "
|
errmsg("finding index of a given shard is only supported for "
|
||||||
|
@ -233,7 +232,7 @@ ShardIndex(ShardInterval *shardInterval)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* short-circuit for reference tables */
|
/* short-circuit for reference tables */
|
||||||
if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
if (!HasDistributionKeyCacheEntry(cacheEntry))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Reference tables and citus local tables have only a single shard,
|
* Reference tables and citus local tables have only a single shard,
|
||||||
|
@ -333,7 +332,7 @@ FindShardIntervalIndex(Datum searchedValue, CitusTableCacheEntry *cacheEntry)
|
||||||
shardIndex = CalculateUniformHashRangeIndex(hashedValue, shardCount);
|
shardIndex = CalculateUniformHashRangeIndex(hashedValue, shardCount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
|
else if (!HasDistributionKeyCacheEntry(cacheEntry))
|
||||||
{
|
{
|
||||||
/* non-distributed tables have a single shard, all values mapped to that shard */
|
/* non-distributed tables have a single shard, all values mapped to that shard */
|
||||||
Assert(shardCount == 1);
|
Assert(shardCount == 1);
|
||||||
|
|
|
@ -35,8 +35,22 @@
|
||||||
#include "distributed/worker_create_or_replace.h"
|
#include "distributed/worker_create_or_replace.h"
|
||||||
#include "distributed/worker_protocol.h"
|
#include "distributed/worker_protocol.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* OnCollisionAction describes what to do when the created object
|
||||||
|
* and existing object do not match.
|
||||||
|
*/
|
||||||
|
typedef enum OnCollisionAction
|
||||||
|
{
|
||||||
|
ON_COLLISION_RENAME,
|
||||||
|
ON_COLLISION_DROP
|
||||||
|
} OnCollisionAction;
|
||||||
|
|
||||||
|
|
||||||
static List * CreateStmtListByObjectAddress(const ObjectAddress *address);
|
static List * CreateStmtListByObjectAddress(const ObjectAddress *address);
|
||||||
static bool CompareStringList(List *list1, List *list2);
|
static bool CompareStringList(List *list1, List *list2);
|
||||||
|
static OnCollisionAction GetOnCollisionAction(const ObjectAddress *address);
|
||||||
|
|
||||||
|
|
||||||
PG_FUNCTION_INFO_V1(worker_create_or_replace_object);
|
PG_FUNCTION_INFO_V1(worker_create_or_replace_object);
|
||||||
PG_FUNCTION_INFO_V1(worker_create_or_replace_object_array);
|
PG_FUNCTION_INFO_V1(worker_create_or_replace_object_array);
|
||||||
|
@ -192,7 +206,8 @@ WorkerCreateOrReplaceObject(List *sqlStatements)
|
||||||
/*
|
/*
|
||||||
* Object with name from statement is already found locally, check if states are
|
* Object with name from statement is already found locally, check if states are
|
||||||
* identical. If objects differ we will rename the old object (non- destructively)
|
* identical. If objects differ we will rename the old object (non- destructively)
|
||||||
* as to make room to create the new object according to the spec sent.
|
* or drop it (if safe) as to make room to create the new object according to the
|
||||||
|
* spec sent.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -213,11 +228,22 @@ WorkerCreateOrReplaceObject(List *sqlStatements)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *newName = GenerateBackupNameForCollision(address);
|
Node *utilityStmt = NULL;
|
||||||
|
|
||||||
RenameStmt *renameStmt = CreateRenameStatement(address, newName);
|
if (GetOnCollisionAction(address) == ON_COLLISION_DROP)
|
||||||
const char *sqlRenameStmt = DeparseTreeNode((Node *) renameStmt);
|
{
|
||||||
ProcessUtilityParseTree((Node *) renameStmt, sqlRenameStmt,
|
/* drop the existing object */
|
||||||
|
utilityStmt = (Node *) CreateDropStmt(address);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* rename the existing object */
|
||||||
|
char *newName = GenerateBackupNameForCollision(address);
|
||||||
|
utilityStmt = (Node *) CreateRenameStatement(address, newName);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *commandString = DeparseTreeNode(utilityStmt);
|
||||||
|
ProcessUtilityParseTree(utilityStmt, commandString,
|
||||||
PROCESS_UTILITY_QUERY,
|
PROCESS_UTILITY_QUERY,
|
||||||
NULL, None_Receiver, NULL);
|
NULL, None_Receiver, NULL);
|
||||||
}
|
}
|
||||||
|
@ -286,6 +312,11 @@ CreateStmtListByObjectAddress(const ObjectAddress *address)
|
||||||
return list_make1(GetFunctionDDLCommand(address->objectId, false));
|
return list_make1(GetFunctionDDLCommand(address->objectId, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case OCLASS_PUBLICATION:
|
||||||
|
{
|
||||||
|
return list_make1(CreatePublicationDDLCommand(address->objectId));
|
||||||
|
}
|
||||||
|
|
||||||
case OCLASS_TSCONFIG:
|
case OCLASS_TSCONFIG:
|
||||||
{
|
{
|
||||||
List *stmts = GetCreateTextSearchConfigStatements(address);
|
List *stmts = GetCreateTextSearchConfigStatements(address);
|
||||||
|
@ -312,6 +343,37 @@ CreateStmtListByObjectAddress(const ObjectAddress *address)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetOnCollisionAction decides what to do when the object already exists.
|
||||||
|
*/
|
||||||
|
static OnCollisionAction
|
||||||
|
GetOnCollisionAction(const ObjectAddress *address)
|
||||||
|
{
|
||||||
|
switch (getObjectClass(address))
|
||||||
|
{
|
||||||
|
case OCLASS_PUBLICATION:
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We prefer to drop publications because they can be
|
||||||
|
* harmful (cause update/delete failures) and are relatively
|
||||||
|
* safe to drop.
|
||||||
|
*/
|
||||||
|
return ON_COLLISION_DROP;
|
||||||
|
}
|
||||||
|
|
||||||
|
case OCLASS_COLLATION:
|
||||||
|
case OCLASS_PROC:
|
||||||
|
case OCLASS_TSCONFIG:
|
||||||
|
case OCLASS_TSDICT:
|
||||||
|
case OCLASS_TYPE:
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
return ON_COLLISION_RENAME;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GenerateBackupNameForCollision calculate a backup name for a given object by its
|
* GenerateBackupNameForCollision calculate a backup name for a given object by its
|
||||||
* address. This name should be used when renaming an existing object before creating the
|
* address. This name should be used when renaming an existing object before creating the
|
||||||
|
@ -362,6 +424,64 @@ GenerateBackupNameForCollision(const ObjectAddress *address)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CreateDropPublicationStmt creates a DROP PUBLICATION statement for the
|
||||||
|
* publication at the given address.
|
||||||
|
*/
|
||||||
|
static DropStmt *
|
||||||
|
CreateDropPublicationStmt(const ObjectAddress *address)
|
||||||
|
{
|
||||||
|
Assert(address->classId == PublicationRelationId);
|
||||||
|
|
||||||
|
DropStmt *dropStmt = makeNode(DropStmt);
|
||||||
|
dropStmt->removeType = OBJECT_PUBLICATION;
|
||||||
|
dropStmt->behavior = DROP_RESTRICT;
|
||||||
|
|
||||||
|
HeapTuple publicationTuple =
|
||||||
|
SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(address->objectId));
|
||||||
|
|
||||||
|
if (!HeapTupleIsValid(publicationTuple))
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("cannot find publication with oid: %d",
|
||||||
|
address->objectId)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Form_pg_publication publicationForm =
|
||||||
|
(Form_pg_publication) GETSTRUCT(publicationTuple);
|
||||||
|
|
||||||
|
char *publicationName = NameStr(publicationForm->pubname);
|
||||||
|
dropStmt->objects = list_make1(makeString(publicationName));
|
||||||
|
|
||||||
|
ReleaseSysCache(publicationTuple);
|
||||||
|
|
||||||
|
return dropStmt;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CreateDropStmt returns a DROP statement for the given object.
|
||||||
|
*/
|
||||||
|
DropStmt *
|
||||||
|
CreateDropStmt(const ObjectAddress *address)
|
||||||
|
{
|
||||||
|
switch (getObjectClass(address))
|
||||||
|
{
|
||||||
|
case OCLASS_PUBLICATION:
|
||||||
|
{
|
||||||
|
return CreateDropPublicationStmt(address);
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ereport(ERROR, (errmsg("unsupported object to construct a drop statement"),
|
||||||
|
errdetail("unable to generate a parsetree for the drop")));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CreateRenameTypeStmt creates a rename statement for a type based on its ObjectAddress.
|
* CreateRenameTypeStmt creates a rename statement for a type based on its ObjectAddress.
|
||||||
* The rename statement will rename the existing object on its address to the value
|
* The rename statement will rename the existing object on its address to the value
|
||||||
|
|
|
@ -70,6 +70,7 @@ static void AlterSequenceMinMax(Oid sequenceId, char *schemaName, char *sequence
|
||||||
PG_FUNCTION_INFO_V1(worker_apply_shard_ddl_command);
|
PG_FUNCTION_INFO_V1(worker_apply_shard_ddl_command);
|
||||||
PG_FUNCTION_INFO_V1(worker_apply_inter_shard_ddl_command);
|
PG_FUNCTION_INFO_V1(worker_apply_inter_shard_ddl_command);
|
||||||
PG_FUNCTION_INFO_V1(worker_apply_sequence_command);
|
PG_FUNCTION_INFO_V1(worker_apply_sequence_command);
|
||||||
|
PG_FUNCTION_INFO_V1(worker_adjust_identity_column_seq_ranges);
|
||||||
PG_FUNCTION_INFO_V1(worker_append_table_to_shard);
|
PG_FUNCTION_INFO_V1(worker_append_table_to_shard);
|
||||||
PG_FUNCTION_INFO_V1(worker_nextval);
|
PG_FUNCTION_INFO_V1(worker_nextval);
|
||||||
|
|
||||||
|
@ -133,6 +134,60 @@ worker_apply_inter_shard_ddl_command(PG_FUNCTION_ARGS)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* worker_adjust_identity_column_seq_ranges takes a table oid, runs an ALTER SEQUENCE statement
|
||||||
|
* for each identity column to adjust the minvalue and maxvalue of the sequence owned by
|
||||||
|
* identity column such that the sequence creates globally unique values.
|
||||||
|
* We use table oid instead of sequence name to avoid any potential conflicts between sequences of different tables. This way, we can safely iterate through identity columns on a specific table without any issues. While this may introduce a small amount of business logic to workers, it's a much safer approach overall.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
worker_adjust_identity_column_seq_ranges(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
CheckCitusVersion(ERROR);
|
||||||
|
|
||||||
|
Oid tableRelationId = PG_GETARG_OID(0);
|
||||||
|
|
||||||
|
EnsureTableOwner(tableRelationId);
|
||||||
|
|
||||||
|
Relation tableRelation = relation_open(tableRelationId, AccessShareLock);
|
||||||
|
TupleDesc tableTupleDesc = RelationGetDescr(tableRelation);
|
||||||
|
|
||||||
|
bool missingSequenceOk = false;
|
||||||
|
|
||||||
|
for (int attributeIndex = 0; attributeIndex < tableTupleDesc->natts;
|
||||||
|
attributeIndex++)
|
||||||
|
{
|
||||||
|
Form_pg_attribute attributeForm = TupleDescAttr(tableTupleDesc,
|
||||||
|
attributeIndex);
|
||||||
|
|
||||||
|
/* skip dropped columns */
|
||||||
|
if (attributeForm->attisdropped)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (attributeForm->attidentity)
|
||||||
|
{
|
||||||
|
Oid sequenceOid = getIdentitySequence(tableRelationId,
|
||||||
|
attributeForm->attnum,
|
||||||
|
missingSequenceOk);
|
||||||
|
|
||||||
|
Oid sequenceSchemaOid = get_rel_namespace(sequenceOid);
|
||||||
|
char *sequenceSchemaName = get_namespace_name(sequenceSchemaOid);
|
||||||
|
char *sequenceName = get_rel_name(sequenceOid);
|
||||||
|
Oid sequenceTypeId = pg_get_sequencedef(sequenceOid)->seqtypid;
|
||||||
|
|
||||||
|
AlterSequenceMinMax(sequenceOid, sequenceSchemaName, sequenceName,
|
||||||
|
sequenceTypeId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
relation_close(tableRelation, NoLock);
|
||||||
|
|
||||||
|
PG_RETURN_VOID();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* worker_apply_sequence_command takes a CREATE SEQUENCE command string, runs the
|
* worker_apply_sequence_command takes a CREATE SEQUENCE command string, runs the
|
||||||
* CREATE SEQUENCE command then creates and runs an ALTER SEQUENCE statement
|
* CREATE SEQUENCE command then creates and runs an ALTER SEQUENCE statement
|
||||||
|
|
|
@ -351,18 +351,17 @@ ShouldHideShardsInternal(void)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (MyBackendType != B_BACKEND)
|
else if (MyBackendType != B_BACKEND && MyBackendType != B_WAL_SENDER)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* We are aiming only to hide shards from client
|
* We are aiming only to hide shards from client
|
||||||
* backends or certain background workers(see above),
|
* backends or certain background workers(see above),
|
||||||
* not backends like walsender or checkpointer.
|
|
||||||
*/
|
*/
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IsCitusInternalBackend() || IsRebalancerInternalBackend() ||
|
if (IsCitusInternalBackend() || IsRebalancerInternalBackend() ||
|
||||||
IsCitusRunCommandBackend())
|
IsCitusRunCommandBackend() || IsCitusShardTransferBackend())
|
||||||
{
|
{
|
||||||
/* we never hide shards from Citus */
|
/* we never hide shards from Citus */
|
||||||
return false;
|
return false;
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue