Merge branch 'citusdata:main' into sqlancer-test-gha

pull/6697/head
Gokhan Gulbiz 2023-07-03 16:30:09 +03:00 committed by GitHub
commit 7cfaa592a4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
565 changed files with 38958 additions and 16175 deletions

View File

@ -6,19 +6,16 @@ orbs:
parameters:
image_suffix:
type: string
default: '-v3417e8d'
pg13_version:
type: string
default: '13.10'
default: '-vbab548a'
pg14_version:
type: string
default: '14.7'
default: '14.8'
pg15_version:
type: string
default: '15.2'
default: '15.3'
upgrade_pg_versions:
type: string
default: '13.10-14.7-15.2'
default: '14.8-15.3'
style_checker_tools_version:
type: string
default: '0.8.18'
@ -421,6 +418,66 @@ jobs:
- coverage:
flags: 'test_<< parameters.pg_major >>,upgrade'
test-query-generator:
description: Expects that the generated queries that are run on distributed and local tables would have the same results
parameters:
pg_major:
description: 'postgres major version'
type: integer
image:
description: 'docker image to use as for the tests'
type: string
default: citus/failtester
image_tag:
description: 'docker image tag to use'
type: string
docker:
- image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>'
working_directory: /home/circleci/project
steps:
- checkout
- attach_workspace:
at: .
- install_extension:
pg_major: << parameters.pg_major >>
- configure
- enable_core
- run:
name: 'Run Test'
command: |
gosu circleci make -C src/test/regress check-query-generator
no_output_timeout: 5m
- run:
name: 'Show regressions'
command: |
find src/test/regress/citus_tests/query_generator/out/ -name "local_dist.diffs" -exec cat {} +
lines=$(find src/test/regress/citus_tests/query_generator/out/ -name "local_dist.diffs" | wc -l)
if [ $lines -ne 0 ]; then
exit 1
fi
when: on_fail
- run:
name: 'Copy logfiles'
command: |
mkdir src/test/regress/tmp_citus_test/logfiles
find src/test/regress/tmp_citus_test/ -name "logfile_*" -exec cp -t src/test/regress/tmp_citus_test/logfiles/ {} +
when: on_fail
- store_artifacts:
name: 'Save logfiles'
path: src/test/regress/tmp_citus_test/logfiles
- store_artifacts:
name: 'Save ddls'
path: src/test/regress/citus_tests/query_generator/out/ddls.sql
- store_artifacts:
name: 'Save dmls'
path: src/test/regress/citus_tests/query_generator/out/queries.sql
- store_artifacts:
name: 'Save diffs'
path: src/test/regress/citus_tests/query_generator/out/local_dist.diffs
- stack_trace
- coverage:
flags: 'test_<< parameters.pg_major >>,querygen'
test-citus:
description: Runs the common tests of citus
parameters:
@ -505,7 +562,7 @@ jobs:
check-merge-to-enterprise:
docker:
- image: citus/extbuilder:<< pipeline.parameters.pg13_version >>
- image: citus/extbuilder:<< pipeline.parameters.pg14_version >>
working_directory: /home/circleci/project
steps:
- checkout
@ -657,10 +714,6 @@ workflows:
when:
not: << pipeline.parameters.flaky_test >>
jobs:
- build:
name: build-13
pg_major: 13
image_tag: '<< pipeline.parameters.pg13_version >>'
- build:
name: build-14
pg_major: 14
@ -673,79 +726,6 @@ workflows:
- check-style
- check-sql-snapshots
- test-citus: &test-citus-13
name: 'test-13_check-multi'
make: check-multi
pg_major: 13
image_tag: '<< pipeline.parameters.pg13_version >>'
requires: [build-13]
- test-citus:
<<: *test-citus-13
name: 'test-13_check-multi-1'
make: check-multi-1
- test-citus:
<<: *test-citus-13
name: 'test-13_check-mx'
make: check-multi-mx
- test-citus:
<<: *test-citus-13
name: 'test-13_check-vanilla'
make: check-vanilla
- test-citus:
<<: *test-citus-13
name: 'test-13_check-isolation'
make: check-isolation
- test-citus:
<<: *test-citus-13
name: 'test-13_check-operations'
make: check-operations
- test-citus:
<<: *test-citus-13
name: 'test-13_check-follower-cluster'
make: check-follower-cluster
- test-citus:
<<: *test-citus-13
name: 'test-13_check-columnar'
make: check-columnar
- test-citus:
<<: *test-citus-13
name: 'test-13_check-columnar-isolation'
make: check-columnar-isolation
- test-citus:
<<: *test-citus-13
name: 'test-13_check-failure'
image: citus/failtester
make: check-failure
- test-citus:
<<: *test-citus-13
name: 'test-13_check-enterprise'
make: check-enterprise
- test-citus:
<<: *test-citus-13
name: 'test-13_check-enterprise-isolation'
make: check-enterprise-isolation
- test-citus:
<<: *test-citus-13
name: 'test-13_check-enterprise-isolation-logicalrep-1'
make: check-enterprise-isolation-logicalrep-1
- test-citus:
<<: *test-citus-13
name: 'test-13_check-enterprise-isolation-logicalrep-2'
make: check-enterprise-isolation-logicalrep-2
- test-citus:
<<: *test-citus-13
name: 'test-13_check-enterprise-isolation-logicalrep-3'
make: check-enterprise-isolation-logicalrep-3
- test-citus:
<<: *test-citus-13
name: 'test-13_check-enterprise-failure'
image: citus/failtester
make: check-enterprise-failure
- test-citus:
<<: *test-citus-13
name: 'test-13_check-split'
make: check-split
- test-citus: &test-citus-14
name: 'test-14_check-split'
make: check-split
@ -892,12 +872,6 @@ workflows:
image: citus/failtester
make: check-failure
- test-pytest:
name: 'test-13_pytest'
pg_major: 13
image_tag: '<< pipeline.parameters.pg13_version >>'
requires: [build-13]
- test-pytest:
name: 'test-14_pytest'
pg_major: 14
@ -917,12 +891,6 @@ workflows:
image_tag: '<< pipeline.parameters.pg15_version >>'
requires: [build-15]
- test-arbitrary-configs:
name: 'test-13_check-arbitrary-configs'
pg_major: 13
image_tag: '<< pipeline.parameters.pg13_version >>'
requires: [build-13]
- test-arbitrary-configs:
name: 'test-14_check-arbitrary-configs'
pg_major: 14
@ -935,12 +903,17 @@ workflows:
image_tag: '<< pipeline.parameters.pg15_version >>'
requires: [build-15]
- test-pg-upgrade:
name: 'test-13-14_check-pg-upgrade'
old_pg_major: 13
new_pg_major: 14
image_tag: '<< pipeline.parameters.upgrade_pg_versions >>'
requires: [build-13, build-14]
- test-query-generator:
name: 'test-14_check-query-generator'
pg_major: 14
image_tag: '<< pipeline.parameters.pg14_version >>'
requires: [build-14]
- test-query-generator:
name: 'test-15_check-query-generator'
pg_major: 15
image_tag: '<< pipeline.parameters.pg15_version >>'
requires: [build-15]
- test-pg-upgrade:
name: 'test-14-15_check-pg-upgrade'
@ -950,31 +923,13 @@ workflows:
requires: [build-14, build-15]
- test-citus-upgrade:
name: test-13_check-citus-upgrade
pg_major: 13
image_tag: '<< pipeline.parameters.pg13_version >>'
requires: [build-13]
name: test-14_check-citus-upgrade
pg_major: 14
image_tag: '<< pipeline.parameters.pg14_version >>'
requires: [build-14]
- upload-coverage:
requires:
- test-13_check-multi
- test-13_check-multi-1
- test-13_check-mx
- test-13_check-vanilla
- test-13_check-isolation
- test-13_check-operations
- test-13_check-follower-cluster
- test-13_check-columnar
- test-13_check-columnar-isolation
- test-13_check-failure
- test-13_check-enterprise
- test-13_check-enterprise-isolation
- test-13_check-enterprise-isolation-logicalrep-1
- test-13_check-enterprise-isolation-logicalrep-2
- test-13_check-enterprise-isolation-logicalrep-3
- test-13_check-enterprise-failure
- test-13_check-split
- test-13_check-arbitrary-configs
- test-14_check-multi
- test-14_check-multi-1
- test-14_check-mx
@ -993,6 +948,7 @@ workflows:
- test-14_check-enterprise-failure
- test-14_check-split
- test-14_check-arbitrary-configs
- test-14_check-query-generator
- test-15_check-multi
- test-15_check-multi-1
- test-15_check-mx
@ -1011,18 +967,18 @@ workflows:
- test-15_check-enterprise-failure
- test-15_check-split
- test-15_check-arbitrary-configs
- test-13-14_check-pg-upgrade
- test-15_check-query-generator
- test-14-15_check-pg-upgrade
- test-13_check-citus-upgrade
- test-14_check-citus-upgrade
- ch_benchmark:
requires: [build-13]
requires: [build-14]
filters:
branches:
only:
- /ch_benchmark\/.*/ # match with ch_benchmark/ prefix
- tpcc_benchmark:
requires: [build-13]
requires: [build-14]
filters:
branches:
only:

View File

@ -4,3 +4,4 @@ extend-ignore = E203
# black will truncate to 88 characters usually, but long string literals it
# might keep. That's fine in most cases unless it gets really excessive.
max-line-length = 150
exclude = .git,__pycache__,vendor,tmp_*

View File

@ -1,3 +1,200 @@
### citus v11.3.0 (May 2, 2023) ###
* Introduces CDC implementation for Citus using logical replication
(#6623, #6810, #6827)
* Adds support for `MERGE` command on co-located distributed tables joined on
distribution column (#6696, #6733)
* Adds the view `citus_stat_tenants` that monitor statistics on tenant usages
(#6725)
* Adds the GUC `citus.max_background_task_executors_per_node` to control number
of background task executors involving a node (#6771)
* Allows parallel shard moves in background rebalancer (#6756)
* Introduces the GUC `citus.metadata_sync_mode` that introduces nontransactional
mode for metadata sync (#6728, #6889)
* Propagates CREATE/ALTER/DROP PUBLICATION statements for distributed tables
(#6776)
* Adds the GUC `citus.enable_non_colocated_router_query_pushdown` to ensure
generating a consistent distributed plan for the queries that reference
non-colocated distributed tables when set to "false" (#6793)
* Checks if all moves are able to be done via logical replication for rebalancer
(#6754)
* Correctly reports shard size in `citus_shards` view (#6748)
* Fixes a bug in shard copy operations (#6721)
* Fixes a bug that prevents enforcing identity column restrictions on worker
nodes (#6738)
* Fixes a bug with `INSERT .. SELECT` queries with identity columns (#6802)
* Fixes an issue that caused some queries with custom aggregates to fail (#6805)
* Fixes an issue when `citus_set_coordinator_host` is called more than once
(#6837)
* Fixes an uninitialized memory access in shard split API (#6845)
* Fixes memory leak and max allocation block errors during metadata syncing
(#6728)
* Fixes memory leak in `undistribute_table` (#6693)
* Fixes memory leak in `alter_distributed_table` (#6726)
* Fixes memory leak in `create_distributed_table` (#6722)
* Fixes memory leak issue with query results that returns single row (#6724)
* Improves rebalancer when shard groups have placement count less than worker
count (#6739)
* Makes sure to stop maintenance daemon when dropping a database even without
Citus extension (#6688)
* Prevents using `alter_distributed_table` and `undistribute_table` UDFs when a
table has identity columns (#6738)
* Prevents using identity columns on data types other than `bigint` on
distributed tables (#6738)
### citus v11.2.1 (April 20, 2023) ###
* Correctly reports shard size in `citus_shards` view (#6748)
* Fixes a bug in shard copy operations (#6721)
* Fixes a bug with `INSERT .. SELECT` queries with identity columns (#6802)
* Fixes an uninitialized memory access in shard split API (#6845)
* Fixes compilation for PG13.10 and PG14.7 (#6711)
* Fixes memory leak in `alter_distributed_table` (#6726)
* Fixes memory leak issue with query results that returns single row (#6724)
* Prevents using `alter_distributed_table` and `undistribute_table` UDFs when a
table has identity columns (#6738)
* Prevents using identity columns on data types other than `bigint` on
distributed tables (#6738)
### citus v11.1.6 (April 20, 2023) ###
* Correctly reports shard size in `citus_shards` view (#6748)
* Fixes a bug in shard copy operations (#6721)
* Fixes a bug that breaks pg upgrades if the user has a columnar table (#6624)
* Fixes a bug that causes background rebalancer to fail when a reference table
doesn't have a primary key (#6682)
* Fixes a regression in allowed foreign keys on distributed tables (#6550)
* Fixes a use-after-free bug in connection management (#6685)
* Fixes an unexpected foreign table error by disallowing to drop the
`table_name` option (#6669)
* Fixes an uninitialized memory access in shard split API (#6845)
* Fixes compilation for PG13.10 and PG14.7 (#6711)
* Fixes crash that happens when trying to replicate a reference table that is
actually dropped (#6595)
* Fixes memory leak issue with query results that returns single row (#6724)
* Fixes the modifiers for subscription and role creation (#6603)
* Makes sure to quote all identifiers used for logical replication to prevent
potential issues (#6604)
* Makes sure to skip foreign key validations at the end of shard moves (#6640)
### citus v11.0.8 (April 20, 2023) ###
* Correctly reports shard size in `citus_shards` view (#6748)
* Fixes a bug that breaks pg upgrades if the user has a columnar table (#6624)
* Fixes an unexpected foreign table error by disallowing to drop the
`table_name` option (#6669)
* Fixes compilation warning on PG13 + OpenSSL 3.0 (#6038, #6502)
* Fixes crash that happens when trying to replicate a reference table that is
actually dropped (#6595)
* Fixes memory leak issue with query results that returns single row (#6724)
* Fixes the modifiers for subscription and role creation (#6603)
* Fixes two potential dangling pointer issues (#6504, #6507)
* Makes sure to quote all identifiers used for logical replication to prevent
potential issues (#6604)
### citus v10.2.9 (April 20, 2023) ###
* Correctly reports shard size in `citus_shards` view (#6748)
* Fixes a bug in `ALTER EXTENSION citus UPDATE` (#6383)
* Fixes a bug that breaks pg upgrades if the user has a columnar table (#6624)
* Fixes a bug that prevents retaining columnar table options after a
table-rewrite (#6337)
* Fixes memory leak issue with query results that returns single row (#6724)
* Raises memory limits in columnar from 256MB to 1GB for reads and writes
(#6419)
### citus v10.1.6 (April 20, 2023) ###
* Fixes a crash that occurs when the aggregate that cannot be pushed-down
returns empty result from a worker (#5679)
* Fixes columnar freezing/wraparound bug (#5962)
* Fixes memory leak issue with query results that returns single row (#6724)
* Prevents alter table functions from dropping extensions (#5974)
### citus v10.0.8 (April 20, 2023) ###
* Fixes a bug that could break `DROP SCHEMA/EXTENSON` commands when there is a
columnar table (#5458)
* Fixes a crash that occurs when the aggregate that cannot be pushed-down
returns empty result from a worker (#5679)
* Fixes columnar freezing/wraparound bug (#5962)
* Fixes memory leak issue with query results that returns single row (#6724)
* Prevents alter table functions from dropping extensions (#5974)
### citus v9.5.12 (April 20, 2023) ###
* Fixes a crash that occurs when the aggregate that cannot be pushed-down
returns empty result from a worker (#5679)
* Fixes memory leak issue with query results that returns single row (#6724)
* Prevents alter table functions from dropping extensions (#5974)
### citus v11.2.0 (January 30, 2023) ###
* Adds support for outer joins with reference tables / complex subquery-CTEs

View File

@ -219,6 +219,18 @@ style `#include` statements like this:
Any other SQL you can put directly in the main sql file, e.g.
`src/backend/distributed/sql/citus--8.3-1--9.0-1.sql`.
### Backporting a commit to a release branch
1. Check out the release branch that you want to backport to `git checkout release-11.3`
2. Make sure you have the latest changes `git pull`
3. Create a new release branch with a unique name `git checkout -b release-11.3-<yourname>`
4. Cherry-pick the commit that you want to backport `git cherry-pick -x <sha>` (the `-x` is important)
5. Push the branch `git push`
6. Wait for tests to pass
7. If the cherry-pick required non-trivial merge conflicts, create a PR and ask
for a review.
8. After the tests pass on CI, fast-forward the release branch `git push origin release-11.3-<yourname>:release-11.3`
### Running tests
See [`src/test/regress/README.md`](https://github.com/citusdata/citus/blob/master/src/test/regress/README.md)

View File

@ -1,4 +1,4 @@
| **<br/>The Citus database is 100% open source.<br/><img width=1000/><br/>Learn what's new in the [Citus 11.2 release blog](https://www.citusdata.com/blog/2023/02/08/whats-new-in-citus-11-2-patroni-ha-support/) and the [Citus Updates page](https://www.citusdata.com/updates/).<br/><br/>**|
| **<br/>The Citus database is 100% open source.<br/><img width=1000/><br/>Learn what's new in the [Citus 11.3 release blog](https://www.citusdata.com/blog/2023/05/05/whats-new-in-citus-11-3-multi-tenant-saas/) and the [Citus Updates page](https://www.citusdata.com/updates/).<br/><br/>**|
|---|
<br/>
@ -8,7 +8,7 @@
[![Latest Docs](https://img.shields.io/badge/docs-latest-brightgreen.svg)](https://docs.citusdata.com/)
[![Stack Overflow](https://img.shields.io/badge/Stack%20Overflow-%20-545353?logo=Stack%20Overflow)](https://stackoverflow.com/questions/tagged/citus)
[![Slack Status](https://citus-slack.herokuapp.com/badge.svg)](https://citus-public.slack.com/)
[Slack](https://citus-public.slack.com/)
[![Code Coverage](https://codecov.io/gh/citusdata/citus/branch/master/graph/badge.svg)](https://app.codecov.io/gh/citusdata/citus)
[![Twitter](https://img.shields.io/twitter/follow/citusdata.svg?label=Follow%20@citusdata)](https://twitter.com/intent/follow?screen_name=citusdata)
@ -94,14 +94,14 @@ Install packages on Ubuntu / Debian:
```bash
curl https://install.citusdata.com/community/deb.sh > add-citus-repo.sh
sudo bash add-citus-repo.sh
sudo apt-get -y install postgresql-15-citus-11.2
sudo apt-get -y install postgresql-15-citus-11.3
```
Install packages on CentOS / Red Hat:
```bash
curl https://install.citusdata.com/community/rpm.sh > add-citus-repo.sh
sudo bash add-citus-repo.sh
sudo yum install -y citus112_15
sudo yum install -y citus113_15
```
To add Citus to your local PostgreSQL database, add the following to `postgresql.conf`:
@ -349,7 +349,7 @@ To learn more about columnar storage, check out the [columnar storage README](ht
## Setting up with High Availability
One of the most popular high availability solutions for PostgreSQL, [Patroni 3.0](https://github.com/zalando/patroni), has [first class support for Citus 10.0 and above](https://patroni.readthedocs.io/en/latest/citus.html#citus), additionally Citus 11.2 ships with improvements for smoother node switchover in Patroni.
One of the most popular high availability solutions for PostgreSQL, [Patroni 3.0](https://github.com/zalando/patroni), has [first class support for Citus 10.0 and above](https://patroni.readthedocs.io/en/latest/citus.html#citus), additionally since Citus 11.2 ships with improvements for smoother node switchover in Patroni.
An example of patronictl list output for the Citus cluster:

3
configure vendored
View File

@ -2588,7 +2588,7 @@ fi
if test "$with_pg_version_check" = no; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: building against PostgreSQL $version_num (skipped compatibility check)" >&5
$as_echo "$as_me: building against PostgreSQL $version_num (skipped compatibility check)" >&6;}
elif test "$version_num" != '13' -a "$version_num" != '14' -a "$version_num" != '15'; then
elif test "$version_num" != '14' -a "$version_num" != '15'; then
as_fn_error $? "Citus is not compatible with the detected PostgreSQL version ${version_num}." "$LINENO" 5
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: building against PostgreSQL $version_num" >&5
@ -6160,3 +6160,4 @@ if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
fi

View File

@ -80,7 +80,7 @@ AC_SUBST(with_pg_version_check)
if test "$with_pg_version_check" = no; then
AC_MSG_NOTICE([building against PostgreSQL $version_num (skipped compatibility check)])
elif test "$version_num" != '13' -a "$version_num" != '14' -a "$version_num" != '15'; then
elif test "$version_num" != '14' -a "$version_num" != '15'; then
AC_MSG_ERROR([Citus is not compatible with the detected PostgreSQL version ${version_num}.])
else
AC_MSG_NOTICE([building against PostgreSQL $version_num])

View File

@ -1,6 +1,6 @@
# Columnar extension
comment = 'Citus Columnar extension'
default_version = '11.2-1'
default_version = '11.3-1'
module_pathname = '$libdir/citus_columnar'
relocatable = false
schema = pg_catalog

View File

@ -198,7 +198,7 @@ columnar_customscan_init()
&EnableColumnarCustomScan,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
"columnar.enable_qual_pushdown",
@ -208,7 +208,7 @@ columnar_customscan_init()
&EnableColumnarQualPushdown,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomRealVariable(
"columnar.qual_pushdown_correlation_threshold",
@ -222,7 +222,7 @@ columnar_customscan_init()
0.0,
1.0,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomIntVariable(
"columnar.max_custom_scan_paths",
@ -234,7 +234,7 @@ columnar_customscan_init()
1,
1024,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomEnumVariable(
"columnar.planner_debug_level",

View File

@ -159,5 +159,5 @@ MemoryContextTotals(MemoryContext context, MemoryContextCounters *counters)
MemoryContextTotals(child, counters);
}
context->methods->stats_compat(context, NULL, NULL, counters, true);
context->methods->stats(context, NULL, NULL, counters, true);
}

View File

@ -1623,12 +1623,8 @@ StartModifyRelation(Relation rel)
{
EState *estate = create_estate_for_relation(rel);
#if PG_VERSION_NUM >= PG_VERSION_14
ResultRelInfo *resultRelInfo = makeNode(ResultRelInfo);
InitResultRelInfo(resultRelInfo, rel, 1, NULL, 0);
#else
ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
#endif
/* ExecSimpleRelationInsert, ... require caller to open indexes */
ExecOpenIndices(resultRelInfo, false);
@ -1658,7 +1654,7 @@ InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, bool *nulls)
ExecStoreHeapTuple(tuple, slot, false);
/* use ExecSimpleRelationInsert to enforce constraints */
ExecSimpleRelationInsert_compat(state->resultRelInfo, state->estate, slot);
ExecSimpleRelationInsert(state->resultRelInfo, state->estate, slot);
}
@ -1689,12 +1685,8 @@ FinishModifyRelation(ModifyState *state)
ExecCloseIndices(state->resultRelInfo);
AfterTriggerEndQuery(state->estate);
#if PG_VERSION_NUM >= PG_VERSION_14
ExecCloseResultRelations(state->estate);
ExecCloseRangeTableRelations(state->estate);
#else
ExecCleanUpTriggerState(state->estate);
#endif
ExecResetTupleTable(state->estate->es_tupleTable, false);
FreeExecutorState(state->estate);
@ -1723,15 +1715,6 @@ create_estate_for_relation(Relation rel)
rte->rellockmode = AccessShareLock;
ExecInitRangeTable(estate, list_make1(rte));
#if PG_VERSION_NUM < PG_VERSION_14
ResultRelInfo *resultRelInfo = makeNode(ResultRelInfo);
InitResultRelInfo(resultRelInfo, rel, 1, NULL, 0);
estate->es_result_relations = resultRelInfo;
estate->es_num_result_relations = 1;
estate->es_result_relation_info = resultRelInfo;
#endif
estate->es_output_cid = GetCurrentCommandId(true);
/* Prepare to catch AFTER triggers. */

View File

@ -1557,7 +1557,7 @@ DeserializeDatumArray(StringInfo datumBuffer, bool *existsArray, uint32 datumCou
datumTypeLength);
currentDatumDataOffset = att_addlength_datum(currentDatumDataOffset,
datumTypeLength,
currentDatumDataPointer);
datumArray[datumIndex]);
currentDatumDataOffset = att_align_nominal(currentDatumDataOffset,
datumTypeAlign);

View File

@ -115,9 +115,7 @@ static RangeVar * ColumnarProcessAlterTable(AlterTableStmt *alterTableStmt,
List **columnarOptions);
static void ColumnarProcessUtility(PlannedStmt *pstmt,
const char *queryString,
#if PG_VERSION_NUM >= PG_VERSION_14
bool readOnlyTree,
#endif
ProcessUtilityContext context,
ParamListInfo params,
struct QueryEnvironment *queryEnv,
@ -665,7 +663,6 @@ columnar_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
}
#if PG_VERSION_NUM >= PG_VERSION_14
static TransactionId
columnar_index_delete_tuples(Relation rel,
TM_IndexDeleteOp *delstate)
@ -714,19 +711,6 @@ columnar_index_delete_tuples(Relation rel,
}
#else
static TransactionId
columnar_compute_xid_horizon_for_tuples(Relation rel,
ItemPointerData *tids,
int nitems)
{
elog(ERROR, "columnar_compute_xid_horizon_for_tuples not implemented");
}
#endif
static void
columnar_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
int options, BulkInsertState bistate)
@ -1484,8 +1468,7 @@ columnar_index_build_range_scan(Relation columnarRelation,
if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
{
/* ignore lazy VACUUM's */
OldestXmin = GetOldestNonRemovableTransactionId_compat(columnarRelation,
PROCARRAY_FLAGS_VACUUM);
OldestXmin = GetOldestNonRemovableTransactionId(columnarRelation);
}
Snapshot snapshot = { 0 };
@ -1813,7 +1796,7 @@ ColumnarReadMissingRowsIntoIndex(TableScanDesc scan, Relation indexRelation,
Relation columnarRelation = scan->rs_rd;
IndexUniqueCheck indexUniqueCheck =
indexInfo->ii_Unique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO;
index_insert_compat(indexRelation, indexValues, indexNulls, columnarItemPointer,
index_insert(indexRelation, indexValues, indexNulls, columnarItemPointer,
columnarRelation, indexUniqueCheck, false, indexInfo);
validateIndexState->tups_inserted += 1;
@ -2018,7 +2001,7 @@ columnar_tableam_init()
&EnableVersionChecksColumnar,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
}
@ -2240,21 +2223,17 @@ ColumnarProcessAlterTable(AlterTableStmt *alterTableStmt, List **columnarOptions
static void
ColumnarProcessUtility(PlannedStmt *pstmt,
const char *queryString,
#if PG_VERSION_NUM >= PG_VERSION_14
bool readOnlyTree,
#endif
ProcessUtilityContext context,
ParamListInfo params,
struct QueryEnvironment *queryEnv,
DestReceiver *dest,
QueryCompletion *completionTag)
{
#if PG_VERSION_NUM >= PG_VERSION_14
if (readOnlyTree)
{
pstmt = copyObject(pstmt);
}
#endif
Node *parsetree = pstmt->utilityStmt;
@ -2371,7 +2350,7 @@ ColumnarProcessUtility(PlannedStmt *pstmt,
CheckCitusColumnarAlterExtensionStmt(parsetree);
}
PrevProcessUtilityHook_compat(pstmt, queryString, false, context,
PrevProcessUtilityHook(pstmt, queryString, false, context,
params, queryEnv, dest, completionTag);
if (columnarOptions != NIL)
@ -2500,11 +2479,7 @@ static const TableAmRoutine columnar_am_methods = {
.tuple_get_latest_tid = columnar_get_latest_tid,
.tuple_tid_valid = columnar_tuple_tid_valid,
.tuple_satisfies_snapshot = columnar_tuple_satisfies_snapshot,
#if PG_VERSION_NUM >= PG_VERSION_14
.index_delete_tuples = columnar_index_delete_tuples,
#else
.compute_xid_horizon_for_tuples = columnar_compute_xid_horizon_for_tuples,
#endif
.tuple_insert = columnar_tuple_insert,
.tuple_insert_speculative = columnar_tuple_insert_speculative,

View File

@ -0,0 +1 @@
-- citus_columnar--11.2-1--11.3-1

View File

@ -0,0 +1 @@
-- citus_columnar--11.3-1--11.2-1

View File

@ -203,8 +203,7 @@ AddShardIdToHashTable(uint64 shardId, ShardIdHashEntry *entry)
{
entry->shardId = shardId;
entry->distributedTableId = CdcLookupShardRelationFromCatalog(shardId, true);
entry->isReferenceTable = CdcPartitionMethodViaCatalog(entry->distributedTableId) ==
'n';
entry->isReferenceTable = CdcIsReferenceTableViaCatalog(entry->distributedTableId);
return entry->distributedTableId;
}
@ -361,12 +360,14 @@ GetTupleForTargetSchemaForCdc(HeapTuple sourceRelationTuple,
targetNulls[targetIndex] = true;
targetIndex++;
}
/* If this source attribute has been dropped, just skip this source attribute.*/
else if (TupleDescAttr(sourceRelDesc, sourceIndex)->attisdropped)
{
sourceIndex++;
continue;
}
/* If both source and target attributes are not dropped, add the attribute field to targetValues. */
else if (sourceIndex < sourceRelDesc->natts)
{

View File

@ -331,16 +331,16 @@ CdcPgDistPartitionTupleViaCatalog(Oid relationId)
/*
* CdcPartitionMethodViaCatalog gets a relationId and returns the partition
* method column from pg_dist_partition via reading from catalog.
* CdcIsReferenceTableViaCatalog gets a relationId and returns true if the relation
* is a reference table and false otherwise.
*/
char
CdcPartitionMethodViaCatalog(Oid relationId)
CdcIsReferenceTableViaCatalog(Oid relationId)
{
HeapTuple partitionTuple = CdcPgDistPartitionTupleViaCatalog(relationId);
if (!HeapTupleIsValid(partitionTuple))
{
return DISTRIBUTE_BY_INVALID;
return false;
}
Datum datumArray[Natts_pg_dist_partition];
@ -351,21 +351,32 @@ CdcPartitionMethodViaCatalog(Oid relationId)
TupleDesc tupleDescriptor = RelationGetDescr(pgDistPartition);
heap_deform_tuple(partitionTuple, tupleDescriptor, datumArray, isNullArray);
if (isNullArray[Anum_pg_dist_partition_partmethod - 1])
if (isNullArray[Anum_pg_dist_partition_partmethod - 1] ||
isNullArray[Anum_pg_dist_partition_repmodel - 1])
{
/* partition method cannot be NULL, still let's make sure */
/*
* partition method and replication model cannot be NULL,
* still let's make sure
*/
heap_freetuple(partitionTuple);
table_close(pgDistPartition, NoLock);
return DISTRIBUTE_BY_INVALID;
return false;
}
Datum partitionMethodDatum = datumArray[Anum_pg_dist_partition_partmethod - 1];
char partitionMethodChar = DatumGetChar(partitionMethodDatum);
Datum replicationModelDatum = datumArray[Anum_pg_dist_partition_repmodel - 1];
char replicationModelChar = DatumGetChar(replicationModelDatum);
heap_freetuple(partitionTuple);
table_close(pgDistPartition, NoLock);
return partitionMethodChar;
/*
* A table is a reference table when its partition method is 'none'
* and replication model is 'two phase commit'
*/
return partitionMethodChar == 'n' && replicationModelChar == 't';
}

View File

@ -25,7 +25,7 @@ uint64 CdcExtractShardIdFromTableName(const char *tableName, bool missingOk);
Oid CdcLookupShardRelationFromCatalog(int64 shardId, bool missingOk);
char CdcPartitionMethodViaCatalog(Oid relationId);
char CdcIsReferenceTableViaCatalog(Oid relationId);
bool CdcCitusHasBeenLoaded(void);

View File

@ -196,6 +196,7 @@ static void EnsureTableNotReferencing(Oid relationId, char conversionType);
static void EnsureTableNotReferenced(Oid relationId, char conversionType);
static void EnsureTableNotForeign(Oid relationId);
static void EnsureTableNotPartition(Oid relationId);
static void ErrorIfColocateWithTenantTable(char *colocateWith);
static TableConversionState * CreateTableConversion(TableConversionParameters *params);
static void CreateDistributedTableLike(TableConversionState *con);
static void CreateCitusTableLike(TableConversionState *con);
@ -247,7 +248,8 @@ undistribute_table(PG_FUNCTION_ARGS)
TableConversionParameters params = {
.relationId = relationId,
.cascadeViaForeignKeys = cascadeViaForeignKeys
.cascadeViaForeignKeys = cascadeViaForeignKeys,
.bypassTenantCheck = false
};
UndistributeTable(&params);
@ -360,6 +362,124 @@ worker_change_sequence_dependency(PG_FUNCTION_ARGS)
}
/*
* DropFKeysAndUndistributeTable drops all foreign keys that relation with
* relationId is involved then undistributes it.
* Note that as UndistributeTable changes relationId of relation, this
* function also returns new relationId of relation.
* Also note that callers are responsible for storing & recreating foreign
* keys to be dropped if needed.
*/
Oid
DropFKeysAndUndistributeTable(Oid relationId)
{
DropFKeysRelationInvolvedWithTableType(relationId, INCLUDE_ALL_TABLE_TYPES);
/* store them before calling UndistributeTable as it changes relationId */
char *relationName = get_rel_name(relationId);
Oid schemaId = get_rel_namespace(relationId);
/* suppress notices messages not to be too verbose */
TableConversionParameters params = {
.relationId = relationId,
.cascadeViaForeignKeys = false,
.suppressNoticeMessages = true
};
UndistributeTable(&params);
Oid newRelationId = get_relname_relid(relationName, schemaId);
/*
* We don't expect this to happen but to be on the safe side let's error
* out here.
*/
EnsureRelationExists(newRelationId);
return newRelationId;
}
/*
* UndistributeTables undistributes given relations. It first collects all foreign keys
* to recreate them after the undistribution. Then, drops the foreign keys and
* undistributes the relations. Finally, it recreates foreign keys.
*/
void
UndistributeTables(List *relationIdList)
{
/*
* Collect foreign keys for recreation and then drop fkeys and undistribute
* tables.
*/
List *originalForeignKeyRecreationCommands = NIL;
Oid relationId = InvalidOid;
foreach_oid(relationId, relationIdList)
{
List *fkeyCommandsForRelation =
GetFKeyCreationCommandsRelationInvolvedWithTableType(relationId,
INCLUDE_ALL_TABLE_TYPES);
originalForeignKeyRecreationCommands = list_concat(
originalForeignKeyRecreationCommands, fkeyCommandsForRelation);
DropFKeysAndUndistributeTable(relationId);
}
/* We can skip foreign key validations as we are sure about them at start */
bool skip_validation = true;
ExecuteForeignKeyCreateCommandList(originalForeignKeyRecreationCommands,
skip_validation);
}
/*
* EnsureUndistributeTenantTableSafe ensures that it is safe to undistribute a tenant table.
*/
void
EnsureUndistributeTenantTableSafe(Oid relationId, const char *operationName)
{
Oid schemaId = get_rel_namespace(relationId);
Assert(IsTenantSchema(schemaId));
/* We only allow undistribute while altering schema */
if (strcmp(operationName, TenantOperationNames[TENANT_SET_SCHEMA]) != 0)
{
ErrorIfTenantTable(relationId, operationName);
}
char *tableName = get_rel_name(relationId);
char *schemaName = get_namespace_name(schemaId);
/*
* Partition table cannot be undistributed. Otherwise, its parent table would still
* be a tenant table whereas partition table would be a local table.
*/
if (PartitionTable(relationId))
{
ereport(ERROR, (errmsg("%s is not allowed for partition table %s in distributed "
"schema %s", operationName, tableName, schemaName),
errdetail("partition table should be under the same distributed "
"schema as its parent and be a "
"distributed schema table.")));
}
/*
* When table is referenced by or referencing to a table in the same tenant
* schema, we should disallow undistributing the table since we do not allow
* foreign keys from/to Citus local or Postgres local table to/from distributed
* schema.
*/
List *fkeyCommandsWithSingleShardTables =
GetFKeyCreationCommandsRelationInvolvedWithTableType(
relationId, INCLUDE_SINGLE_SHARD_TABLES);
if (fkeyCommandsWithSingleShardTables != NIL)
{
ereport(ERROR, (errmsg("%s is not allowed for table %s in distributed schema %s",
operationName, tableName, schemaName),
errdetail("distributed schemas cannot have foreign keys from/to "
"local tables or different schema")));
}
}
/*
* UndistributeTable undistributes the given table. It uses ConvertTable function to
* create a new local table and move everything to that table.
@ -380,6 +500,14 @@ UndistributeTable(TableConversionParameters *params)
"because the table is not distributed")));
}
Oid schemaId = get_rel_namespace(params->relationId);
if (!params->bypassTenantCheck && IsTenantSchema(schemaId) &&
IsCitusTableType(params->relationId, SINGLE_SHARD_DISTRIBUTED))
{
EnsureUndistributeTenantTableSafe(params->relationId,
TenantOperationNames[TENANT_UNDISTRIBUTE_TABLE]);
}
if (!params->cascadeViaForeignKeys)
{
EnsureTableNotReferencing(params->relationId, UNDISTRIBUTE_TABLE);
@ -435,6 +563,9 @@ AlterDistributedTable(TableConversionParameters *params)
"is not distributed")));
}
ErrorIfTenantTable(params->relationId, TenantOperationNames[TENANT_ALTER_TABLE]);
ErrorIfColocateWithTenantTable(params->colocateWith);
EnsureTableNotForeign(params->relationId);
EnsureTableNotPartition(params->relationId);
EnsureHashDistributedTable(params->relationId);
@ -477,8 +608,11 @@ AlterTableSetAccessMethod(TableConversionParameters *params)
EnsureTableNotReferencing(params->relationId, ALTER_TABLE_SET_ACCESS_METHOD);
EnsureTableNotReferenced(params->relationId, ALTER_TABLE_SET_ACCESS_METHOD);
EnsureTableNotForeign(params->relationId);
if (IsCitusTableType(params->relationId, DISTRIBUTED_TABLE))
if (!IsCitusTableType(params->relationId, SINGLE_SHARD_DISTRIBUTED) &&
IsCitusTableType(params->relationId, DISTRIBUTED_TABLE))
{
/* we do not support non-hash distributed tables, except single shard tables */
EnsureHashDistributedTable(params->relationId);
}
@ -1177,6 +1311,25 @@ EnsureTableNotPartition(Oid relationId)
}
/*
* ErrorIfColocateWithTenantTable errors out if given colocateWith text refers to
* a tenant table.
*/
void
ErrorIfColocateWithTenantTable(char *colocateWith)
{
if (colocateWith != NULL &&
!IsColocateWithDefault(colocateWith) &&
!IsColocateWithNone(colocateWith))
{
text *colocateWithTableNameText = cstring_to_text(colocateWith);
Oid colocateWithTableId = ResolveRelationId(colocateWithTableNameText, false);
ErrorIfTenantTable(colocateWithTableId,
TenantOperationNames[TENANT_COLOCATE_WITH]);
}
}
TableConversionState *
CreateTableConversion(TableConversionParameters *params)
{
@ -1364,9 +1517,21 @@ void
CreateCitusTableLike(TableConversionState *con)
{
if (IsCitusTableType(con->relationId, DISTRIBUTED_TABLE))
{
if (IsCitusTableType(con->relationId, SINGLE_SHARD_DISTRIBUTED))
{
ColocationParam colocationParam = {
.colocationParamType = COLOCATE_WITH_TABLE_LIKE_OPT,
.colocateWithTableName = quote_qualified_identifier(con->schemaName,
con->relationName)
};
CreateSingleShardTable(con->newRelationId, colocationParam);
}
else
{
CreateDistributedTableLike(con);
}
}
else if (IsCitusTableType(con->relationId, REFERENCE_TABLE))
{
CreateReferenceTable(con->newRelationId);
@ -1710,20 +1875,13 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands,
}
else if (ShouldSyncTableMetadata(sourceId))
{
char *qualifiedTableName = quote_qualified_identifier(schemaName, sourceName);
/*
* We are converting a citus local table to a distributed/reference table,
* so we should prevent dropping the sequence on the table. Otherwise, we'd
* lose track of the previous changes in the sequence.
*/
StringInfo command = makeStringInfo();
appendStringInfo(command,
"SELECT pg_catalog.worker_drop_sequence_dependency(%s);",
quote_literal_cstr(qualifiedTableName));
SendCommandToWorkersWithMetadata(command->data);
char *command = WorkerDropSequenceDependencyCommand(sourceId);
SendCommandToWorkersWithMetadata(command);
}
}
@ -1863,6 +2021,12 @@ CheckAlterDistributedTableConversionParameters(TableConversionState *con)
"it is not a distributed table",
con->colocateWith)));
}
else if (IsCitusTableType(colocateWithTableOid, SINGLE_SHARD_DISTRIBUTED))
{
ereport(ERROR, (errmsg("cannot colocate with %s because "
"it is a single shard distributed table",
con->colocateWith)));
}
}
/* shard_count:=0 is not allowed */

View File

@ -468,7 +468,8 @@ ExecuteCascadeOperationForRelationIdList(List *relationIdList,
{
TableConversionParameters params = {
.relationId = relationId,
.cascadeViaForeignKeys = cascadeViaForeignKeys
.cascadeViaForeignKeys = cascadeViaForeignKeys,
.bypassTenantCheck = false
};
UndistributeTable(&params);
}

View File

@ -18,6 +18,7 @@
*/
#include "postgres.h"
#include "miscadmin.h"
#include "access/genam.h"
#include "access/htup_details.h"
@ -54,7 +55,7 @@
* This is used after every CREATE TABLE statement in utility_hook.c
* If this variable is set to true, we add all created tables to metadata.
*/
bool AddAllLocalTablesToMetadata = true;
bool AddAllLocalTablesToMetadata = false;
static void citus_add_local_table_to_metadata_internal(Oid relationId,
bool cascadeViaForeignKeys);
@ -1500,3 +1501,38 @@ FinalizeCitusLocalTableCreation(Oid relationId)
InvalidateForeignKeyGraph();
}
}
/*
* ShouldAddNewTableToMetadata takes a relationId and returns true if we need to add a
* newly created table to metadata, false otherwise.
* For partitions and temporary tables, ShouldAddNewTableToMetadata returns false.
* For other tables created, returns true, if we are on a coordinator that is added
* as worker, and ofcourse, if the GUC use_citus_managed_tables is set to on.
*/
bool
ShouldAddNewTableToMetadata(Oid relationId)
{
if (get_rel_persistence(relationId) == RELPERSISTENCE_TEMP ||
PartitionTableNoLock(relationId))
{
/*
* Shouldn't add table to metadata if it's a temp table, or a partition.
* Creating partitions of a table that is added to metadata is already handled.
*/
return false;
}
if (AddAllLocalTablesToMetadata && !IsBinaryUpgrade &&
IsCoordinator() && CoordinatorAddedAsWorkerNode())
{
/*
* We have verified that the GUC is set to true, and we are not upgrading,
* and we are on the coordinator that is added as worker node.
* So return true here, to add this newly created table to metadata.
*/
return true;
}
return false;
}

View File

@ -81,13 +81,6 @@ CitusSignalBackend(uint64 globalPID, uint64 timeout, int sig)
{
Assert((sig == SIGINT) || (sig == SIGTERM));
#if PG_VERSION_NUM < PG_VERSION_14
if (timeout != 0)
{
elog(ERROR, "timeout parameter is only supported on Postgres 14 or later");
}
#endif
bool missingOk = false;
int nodeId = ExtractNodeIdFromGlobalPID(globalPID, missingOk);
int processId = ExtractProcessIdFromGlobalPID(globalPID);
@ -102,14 +95,9 @@ CitusSignalBackend(uint64 globalPID, uint64 timeout, int sig)
}
else
{
#if PG_VERSION_NUM >= PG_VERSION_14
appendStringInfo(cancelQuery,
"SELECT pg_terminate_backend(%d::integer, %lu::bigint)",
processId, timeout);
#else
appendStringInfo(cancelQuery, "SELECT pg_terminate_backend(%d::integer)",
processId);
#endif
}
int connectionFlags = 0;

View File

@ -114,13 +114,6 @@ PreprocessClusterStmt(Node *node, const char *clusterCommand,
static bool
IsClusterStmtVerbose_compat(ClusterStmt *clusterStmt)
{
#if PG_VERSION_NUM < PG_VERSION_14
if (clusterStmt->options & CLUOPT_VERBOSE)
{
return true;
}
return false;
#else
DefElem *opt = NULL;
foreach_ptr(opt, clusterStmt->params)
{
@ -130,5 +123,4 @@ IsClusterStmtVerbose_compat(ClusterStmt *clusterStmt)
}
}
return false;
#endif
}

View File

@ -109,7 +109,7 @@ CreateCollationDDLInternal(Oid collationId, Oid *collowner, char **quotedCollati
colliculocale = NULL;
}
AssertArg((collcollate && collctype) || colliculocale);
Assert((collcollate && collctype) || colliculocale);
#else
/*

View File

@ -111,8 +111,8 @@ typedef struct
{
int shardCount;
bool shardCountIsStrict;
char *colocateWithTableName;
char *distributionColumnName;
ColocationParam colocationParam;
} DistributedTableParams;
@ -141,6 +141,8 @@ static void CreateCitusTable(Oid relationId, CitusTableType tableType,
DistributedTableParams *distributedTableParams);
static void CreateHashDistributedTableShards(Oid relationId, int shardCount,
Oid colocatedTableId, bool localTableEmpty);
static void CreateSingleShardTableShard(Oid relationId, Oid colocatedTableId,
uint32 colocationId);
static uint32 ColocationIdForNewTable(Oid relationId, CitusTableType tableType,
DistributedTableParams *distributedTableParams,
Var *distributionColumn);
@ -157,10 +159,6 @@ static void EnsureCitusTableCanBeCreated(Oid relationOid);
static void PropagatePrerequisiteObjectsForDistributedTable(Oid relationId);
static void EnsureDistributedSequencesHaveOneType(Oid relationId,
List *seqInfoList);
static List * GetFKeyCreationCommandsRelationInvolvedWithTableType(Oid relationId,
int tableTypeFlag);
static Oid DropFKeysAndUndistributeTable(Oid relationId);
static void DropFKeysRelationInvolvedWithTableType(Oid relationId, int tableTypeFlag);
static void CopyLocalDataIntoShards(Oid relationId);
static List * TupleDescColumnNameList(TupleDesc tupleDescriptor);
@ -216,23 +214,30 @@ create_distributed_table(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2) || PG_ARGISNULL(3))
if (PG_ARGISNULL(0) || PG_ARGISNULL(3))
{
PG_RETURN_VOID();
}
Oid relationId = PG_GETARG_OID(0);
text *distributionColumnText = PG_GETARG_TEXT_P(1);
text *distributionColumnText = PG_ARGISNULL(1) ? NULL : PG_GETARG_TEXT_P(1);
Oid distributionMethodOid = PG_GETARG_OID(2);
text *colocateWithTableNameText = PG_GETARG_TEXT_P(3);
char *colocateWithTableName = text_to_cstring(colocateWithTableNameText);
bool shardCountIsStrict = false;
if (distributionColumnText)
{
if (PG_ARGISNULL(2))
{
PG_RETURN_VOID();
}
int shardCount = ShardCount;
if (!PG_ARGISNULL(4))
{
if (pg_strncasecmp(colocateWithTableName, "default", NAMEDATALEN) != 0 &&
pg_strncasecmp(colocateWithTableName, "none", NAMEDATALEN) != 0)
if (!IsColocateWithDefault(colocateWithTableName) &&
!IsColocateWithNone(colocateWithTableName))
{
ereport(ERROR, (errmsg("Cannot use colocate_with with a table "
"and shard_count at the same time")));
@ -241,31 +246,12 @@ create_distributed_table(PG_FUNCTION_ARGS)
shardCount = PG_GETARG_INT32(4);
/*
* if shard_count parameter is given than we have to
* make sure table has that many shards
* If shard_count parameter is given, then we have to
* make sure table has that many shards.
*/
shardCountIsStrict = true;
}
EnsureCitusTableCanBeCreated(relationId);
/* enable create_distributed_table on an empty node */
InsertCoordinatorIfClusterEmpty();
/*
* Lock target relation with an exclusive lock - there's no way to make
* sense of this table until we've committed, and we don't want multiple
* backends manipulating this relation.
*/
Relation relation = try_relation_open(relationId, ExclusiveLock);
if (relation == NULL)
{
ereport(ERROR, (errmsg("could not create distributed table: "
"relation does not exist")));
}
relation_close(relation, NoLock);
char *distributionColumnName = text_to_cstring(distributionColumnText);
Assert(distributionColumnName != NULL);
@ -280,6 +266,38 @@ create_distributed_table(PG_FUNCTION_ARGS)
CreateDistributedTable(relationId, distributionColumnName, distributionMethod,
shardCount, shardCountIsStrict, colocateWithTableName);
}
else
{
if (!PG_ARGISNULL(4))
{
ereport(ERROR, (errmsg("shard_count can't be specified when the "
"distribution column is null because in "
"that case it's automatically set to 1")));
}
if (!PG_ARGISNULL(2) &&
LookupDistributionMethod(PG_GETARG_OID(2)) != DISTRIBUTE_BY_HASH)
{
/*
* As we do for shard_count parameter, we could throw an error if
* distribution_type is not NULL when creating a single-shard table.
* However, this requires changing the default value of distribution_type
* parameter to NULL and this would mean a breaking change for most
* users because they're mostly using this API to create sharded
* tables. For this reason, here we instead do nothing if the distribution
* method is DISTRIBUTE_BY_HASH.
*/
ereport(ERROR, (errmsg("distribution_type can't be specified "
"when the distribution column is null ")));
}
ColocationParam colocationParam = {
.colocationParamType = COLOCATE_WITH_TABLE_LIKE_OPT,
.colocateWithTableName = colocateWithTableName,
};
CreateSingleShardTable(relationId, colocationParam);
}
PG_RETURN_VOID();
}
@ -295,11 +313,18 @@ create_distributed_table_concurrently(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2) || PG_ARGISNULL(3))
if (PG_ARGISNULL(0) || PG_ARGISNULL(2) || PG_ARGISNULL(3))
{
PG_RETURN_VOID();
}
if (PG_ARGISNULL(1))
{
ereport(ERROR, (errmsg("cannot use create_distributed_table_concurrently "
"to create a distributed table with a null shard "
"key, consider using create_distributed_table()")));
}
Oid relationId = PG_GETARG_OID(0);
text *distributionColumnText = PG_GETARG_TEXT_P(1);
char *distributionColumnName = text_to_cstring(distributionColumnText);
@ -887,38 +912,6 @@ create_reference_table(PG_FUNCTION_ARGS)
CheckCitusVersion(ERROR);
Oid relationId = PG_GETARG_OID(0);
EnsureCitusTableCanBeCreated(relationId);
/* enable create_reference_table on an empty node */
InsertCoordinatorIfClusterEmpty();
/*
* Lock target relation with an exclusive lock - there's no way to make
* sense of this table until we've committed, and we don't want multiple
* backends manipulating this relation.
*/
Relation relation = try_relation_open(relationId, ExclusiveLock);
if (relation == NULL)
{
ereport(ERROR, (errmsg("could not create reference table: "
"relation does not exist")));
}
relation_close(relation, NoLock);
List *workerNodeList = ActivePrimaryNodeList(ShareLock);
int workerCount = list_length(workerNodeList);
/* if there are no workers, error out */
if (workerCount == 0)
{
char *relationName = get_rel_name(relationId);
ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("cannot create reference table \"%s\"", relationName),
errdetail("There are no active worker nodes.")));
}
CreateReferenceTable(relationId);
PG_RETURN_VOID();
}
@ -1013,7 +1006,10 @@ CreateDistributedTable(Oid relationId, char *distributionColumnName,
}
DistributedTableParams distributedTableParams = {
.colocationParam = {
.colocateWithTableName = colocateWithTableName,
.colocationParamType = COLOCATE_WITH_TABLE_LIKE_OPT
},
.shardCount = shardCount,
.shardCountIsStrict = shardCountIsStrict,
.distributionColumnName = distributionColumnName
@ -1033,6 +1029,23 @@ CreateReferenceTable(Oid relationId)
}
/*
* CreateSingleShardTable is a wrapper around CreateCitusTable that creates a
* single shard distributed table that doesn't have a shard key.
*/
void
CreateSingleShardTable(Oid relationId, ColocationParam colocationParam)
{
DistributedTableParams distributedTableParams = {
.colocationParam = colocationParam,
.shardCount = 1,
.shardCountIsStrict = true,
.distributionColumnName = NULL
};
CreateCitusTable(relationId, SINGLE_SHARD_DISTRIBUTED, &distributedTableParams);
}
/*
* CreateCitusTable is the internal method that creates a Citus table in
* given configuration.
@ -1051,13 +1064,36 @@ CreateCitusTable(Oid relationId, CitusTableType tableType,
DistributedTableParams *distributedTableParams)
{
if ((tableType == HASH_DISTRIBUTED || tableType == APPEND_DISTRIBUTED ||
tableType == RANGE_DISTRIBUTED) != (distributedTableParams != NULL))
tableType == RANGE_DISTRIBUTED || tableType == SINGLE_SHARD_DISTRIBUTED) !=
(distributedTableParams != NULL))
{
ereport(ERROR, (errmsg("distributed table params must be provided "
"when creating a distributed table and must "
"not be otherwise")));
}
EnsureCitusTableCanBeCreated(relationId);
/* allow creating a Citus table on an empty cluster */
InsertCoordinatorIfClusterEmpty();
Relation relation = try_relation_open(relationId, ExclusiveLock);
if (relation == NULL)
{
ereport(ERROR, (errmsg("could not create Citus table: "
"relation does not exist")));
}
relation_close(relation, NoLock);
if (tableType == SINGLE_SHARD_DISTRIBUTED && ShardReplicationFactor > 1)
{
ereport(ERROR, (errmsg("could not create single shard table: "
"citus.shard_replication_factor is greater than 1"),
errhint("Consider setting citus.shard_replication_factor to 1 "
"and try again")));
}
/*
* EnsureTableNotDistributed errors out when relation is a citus table but
* we don't want to ask user to first undistribute their citus local tables
@ -1115,7 +1151,7 @@ CreateCitusTable(Oid relationId, CitusTableType tableType,
PropagatePrerequisiteObjectsForDistributedTable(relationId);
Var *distributionColumn = NULL;
if (distributedTableParams)
if (distributedTableParams && distributedTableParams->distributionColumnName)
{
distributionColumn = BuildDistributionKeyFromColumnName(relationId,
distributedTableParams->
@ -1130,9 +1166,23 @@ CreateCitusTable(Oid relationId, CitusTableType tableType,
* ColocationIdForNewTable assumes caller acquires lock on relationId. In our case,
* our caller already acquired lock on relationId.
*/
uint32 colocationId = ColocationIdForNewTable(relationId, tableType,
uint32 colocationId = INVALID_COLOCATION_ID;
if (distributedTableParams &&
distributedTableParams->colocationParam.colocationParamType ==
COLOCATE_WITH_COLOCATION_ID)
{
colocationId = distributedTableParams->colocationParam.colocationId;
}
else
{
/*
* ColocationIdForNewTable assumes caller acquires lock on relationId. In our case,
* our caller already acquired lock on relationId.
*/
colocationId = ColocationIdForNewTable(relationId, tableType,
distributedTableParams,
distributionColumn);
}
EnsureRelationCanBeDistributed(relationId, distributionColumn,
citusTableParams.distributionMethod,
@ -1187,6 +1237,11 @@ CreateCitusTable(Oid relationId, CitusTableType tableType,
{
CreateReferenceTableShard(relationId);
}
else if (tableType == SINGLE_SHARD_DISTRIBUTED)
{
CreateSingleShardTableShard(relationId, colocatedTableId,
colocationId);
}
if (ShouldSyncTableMetadata(relationId))
{
@ -1227,7 +1282,10 @@ CreateCitusTable(Oid relationId, CitusTableType tableType,
MemoryContextReset(citusPartitionContext);
DistributedTableParams childDistributedTableParams = {
.colocationParam = {
.colocationParamType = COLOCATE_WITH_TABLE_LIKE_OPT,
.colocateWithTableName = parentRelationName,
},
.shardCount = distributedTableParams->shardCount,
.shardCountIsStrict = false,
.distributionColumnName = distributedTableParams->distributionColumnName,
@ -1241,7 +1299,8 @@ CreateCitusTable(Oid relationId, CitusTableType tableType,
}
/* copy over data for hash distributed and reference tables */
if (tableType == HASH_DISTRIBUTED || tableType == REFERENCE_TABLE)
if (tableType == HASH_DISTRIBUTED || tableType == SINGLE_SHARD_DISTRIBUTED ||
tableType == REFERENCE_TABLE)
{
if (RegularTable(relationId))
{
@ -1277,34 +1336,50 @@ DecideCitusTableParams(CitusTableType tableType,
{
case HASH_DISTRIBUTED:
{
Assert(distributedTableParams->colocationParam.colocationParamType ==
COLOCATE_WITH_TABLE_LIKE_OPT);
citusTableParams.distributionMethod = DISTRIBUTE_BY_HASH;
citusTableParams.replicationModel =
DecideDistTableReplicationModel(DISTRIBUTE_BY_HASH,
distributedTableParams->
distributedTableParams->colocationParam.
colocateWithTableName);
break;
}
case APPEND_DISTRIBUTED:
{
Assert(distributedTableParams->colocationParam.colocationParamType ==
COLOCATE_WITH_TABLE_LIKE_OPT);
citusTableParams.distributionMethod = DISTRIBUTE_BY_APPEND;
citusTableParams.replicationModel =
DecideDistTableReplicationModel(APPEND_DISTRIBUTED,
distributedTableParams->
distributedTableParams->colocationParam.
colocateWithTableName);
break;
}
case RANGE_DISTRIBUTED:
{
Assert(distributedTableParams->colocationParam.colocationParamType ==
COLOCATE_WITH_TABLE_LIKE_OPT);
citusTableParams.distributionMethod = DISTRIBUTE_BY_RANGE;
citusTableParams.replicationModel =
DecideDistTableReplicationModel(RANGE_DISTRIBUTED,
distributedTableParams->
distributedTableParams->colocationParam.
colocateWithTableName);
break;
}
case SINGLE_SHARD_DISTRIBUTED:
{
citusTableParams.distributionMethod = DISTRIBUTE_BY_NONE;
citusTableParams.replicationModel = REPLICATION_MODEL_STREAMING;
break;
}
case REFERENCE_TABLE:
{
citusTableParams.distributionMethod = DISTRIBUTE_BY_NONE;
@ -1504,85 +1579,6 @@ EnsureDistributedSequencesHaveOneType(Oid relationId, List *seqInfoList)
}
/*
* GetFKeyCreationCommandsRelationInvolvedWithTableType returns a list of DDL
* commands to recreate the foreign keys that relation with relationId is involved
* with given table type.
*/
static List *
GetFKeyCreationCommandsRelationInvolvedWithTableType(Oid relationId, int tableTypeFlag)
{
int referencingFKeysFlag = INCLUDE_REFERENCING_CONSTRAINTS |
tableTypeFlag;
List *referencingFKeyCreationCommands =
GetForeignConstraintCommandsInternal(relationId, referencingFKeysFlag);
/* already captured self referencing foreign keys, so use EXCLUDE_SELF_REFERENCES */
int referencedFKeysFlag = INCLUDE_REFERENCED_CONSTRAINTS |
EXCLUDE_SELF_REFERENCES |
tableTypeFlag;
List *referencedFKeyCreationCommands =
GetForeignConstraintCommandsInternal(relationId, referencedFKeysFlag);
return list_concat(referencingFKeyCreationCommands, referencedFKeyCreationCommands);
}
/*
* DropFKeysAndUndistributeTable drops all foreign keys that relation with
* relationId is involved then undistributes it.
* Note that as UndistributeTable changes relationId of relation, this
* function also returns new relationId of relation.
* Also note that callers are responsible for storing & recreating foreign
* keys to be dropped if needed.
*/
static Oid
DropFKeysAndUndistributeTable(Oid relationId)
{
DropFKeysRelationInvolvedWithTableType(relationId, INCLUDE_ALL_TABLE_TYPES);
/* store them before calling UndistributeTable as it changes relationId */
char *relationName = get_rel_name(relationId);
Oid schemaId = get_rel_namespace(relationId);
/* suppress notices messages not to be too verbose */
TableConversionParameters params = {
.relationId = relationId,
.cascadeViaForeignKeys = false,
.suppressNoticeMessages = true
};
UndistributeTable(&params);
Oid newRelationId = get_relname_relid(relationName, schemaId);
/*
* We don't expect this to happen but to be on the safe side let's error
* out here.
*/
EnsureRelationExists(newRelationId);
return newRelationId;
}
/*
* DropFKeysRelationInvolvedWithTableType drops foreign keys that relation
* with relationId is involved with given table type.
*/
static void
DropFKeysRelationInvolvedWithTableType(Oid relationId, int tableTypeFlag)
{
int referencingFKeysFlag = INCLUDE_REFERENCING_CONSTRAINTS |
tableTypeFlag;
DropRelationForeignKeys(relationId, referencingFKeysFlag);
/* already captured self referencing foreign keys, so use EXCLUDE_SELF_REFERENCES */
int referencedFKeysFlag = INCLUDE_REFERENCED_CONSTRAINTS |
EXCLUDE_SELF_REFERENCES |
tableTypeFlag;
DropRelationForeignKeys(relationId, referencedFKeysFlag);
}
/*
* DecideDistTableReplicationModel function decides which replication model should be
* used for a distributed table depending on given distribution configuration.
@ -1667,6 +1663,41 @@ CreateHashDistributedTableShards(Oid relationId, int shardCount,
}
/*
* CreateHashDistributedTableShards creates the shard of given single-shard
* distributed table.
*/
static void
CreateSingleShardTableShard(Oid relationId, Oid colocatedTableId,
uint32 colocationId)
{
if (colocatedTableId != InvalidOid)
{
/*
* We currently allow concurrent distribution of colocated tables (which
* we probably should not be allowing because of foreign keys /
* partitioning etc).
*
* We also prevent concurrent shard moves / copy / splits) while creating
* a colocated table.
*/
AcquirePlacementColocationLock(colocatedTableId, ShareLock,
"colocate distributed table");
/*
* We don't need to force using exclusive connections because we're anyway
* creating a single shard.
*/
bool useExclusiveConnection = false;
CreateColocatedShards(relationId, colocatedTableId, useExclusiveConnection);
}
else
{
CreateSingleShardTableShardWithRoundRobinPolicy(relationId, colocationId);
}
}
/*
* ColocationIdForNewTable returns a colocation id for given table
* according to given configuration. If there is no such configuration, it
@ -1695,12 +1726,16 @@ ColocationIdForNewTable(Oid relationId, CitusTableType tableType,
if (tableType == APPEND_DISTRIBUTED || tableType == RANGE_DISTRIBUTED)
{
if (!IsColocateWithDefault(distributedTableParams->colocateWithTableName))
Assert(distributedTableParams->colocationParam.colocationParamType ==
COLOCATE_WITH_TABLE_LIKE_OPT);
char *colocateWithTableName =
distributedTableParams->colocationParam.colocateWithTableName;
if (!IsColocateWithDefault(colocateWithTableName))
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot distribute relation"),
errdetail("Currently, colocate_with option is only supported "
"for hash distributed tables.")));
errdetail("Currently, colocate_with option is not supported "
"for append / range distributed tables.")));
}
return colocationId;
@ -1716,13 +1751,19 @@ ColocationIdForNewTable(Oid relationId, CitusTableType tableType,
* can be sure that there will no modifications on the colocation table
* until this transaction is committed.
*/
Assert(citusTableParams.distributionMethod == DISTRIBUTE_BY_HASH);
Oid distributionColumnType = distributionColumn->vartype;
Oid distributionColumnCollation = get_typcollation(distributionColumnType);
Oid distributionColumnType =
distributionColumn ? distributionColumn->vartype : InvalidOid;
Oid distributionColumnCollation =
distributionColumn ? get_typcollation(distributionColumnType) : InvalidOid;
Assert(distributedTableParams->colocationParam.colocationParamType ==
COLOCATE_WITH_TABLE_LIKE_OPT);
char *colocateWithTableName =
distributedTableParams->colocationParam.colocateWithTableName;
/* get an advisory lock to serialize concurrent default group creations */
if (IsColocateWithDefault(distributedTableParams->colocateWithTableName))
if (IsColocateWithDefault(colocateWithTableName))
{
AcquireColocationDefaultLock();
}
@ -1734,10 +1775,9 @@ ColocationIdForNewTable(Oid relationId, CitusTableType tableType,
distributedTableParams->shardCount,
distributedTableParams->
shardCountIsStrict,
distributedTableParams->
colocateWithTableName);
if (IsColocateWithDefault(distributedTableParams->colocateWithTableName) &&
if (IsColocateWithDefault(colocateWithTableName) &&
(colocationId != INVALID_COLOCATION_ID))
{
/*
@ -1750,7 +1790,7 @@ ColocationIdForNewTable(Oid relationId, CitusTableType tableType,
if (colocationId == INVALID_COLOCATION_ID)
{
if (IsColocateWithDefault(distributedTableParams->colocateWithTableName))
if (IsColocateWithDefault(colocateWithTableName))
{
/*
* Generate a new colocation ID and insert a pg_dist_colocation
@ -1761,7 +1801,7 @@ ColocationIdForNewTable(Oid relationId, CitusTableType tableType,
distributionColumnType,
distributionColumnCollation);
}
else if (IsColocateWithNone(distributedTableParams->colocateWithTableName))
else if (IsColocateWithNone(colocateWithTableName))
{
/*
* Generate a new colocation ID and insert a pg_dist_colocation
@ -1795,8 +1835,6 @@ EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn,
{
Oid parentRelationId = InvalidOid;
ErrorIfTableHasUnsupportedIdentityColumn(relationId);
EnsureLocalTableEmptyIfNecessary(relationId, distributionMethod);
/* user really wants triggers? */
@ -1908,8 +1946,15 @@ EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn,
*/
if (PartitionedTableNoLock(relationId))
{
/* distributing partitioned tables in only supported for hash-distribution */
if (distributionMethod != DISTRIBUTE_BY_HASH)
/*
* Distributing partitioned tables is only supported for hash-distribution
* or single-shard tables.
*/
bool isSingleShardTable =
distributionMethod == DISTRIBUTE_BY_NONE &&
replicationModel == REPLICATION_MODEL_STREAMING &&
colocationId != INVALID_COLOCATION_ID;
if (distributionMethod != DISTRIBUTE_BY_HASH && !isSingleShardTable)
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("distributing partitioned tables in only supported "

View File

@ -214,13 +214,7 @@ DeferErrorIfCircularDependencyExists(const ObjectAddress *objectAddress)
dependency->objectId == objectAddress->objectId &&
dependency->objectSubId == objectAddress->objectSubId)
{
char *objectDescription = NULL;
#if PG_VERSION_NUM >= PG_VERSION_14
objectDescription = getObjectDescription(objectAddress, false);
#else
objectDescription = getObjectDescription(objectAddress);
#endif
char *objectDescription = getObjectDescription(objectAddress, false);
StringInfo detailInfo = makeStringInfo();
appendStringInfo(detailInfo, "\"%s\" circularly depends itself, resolve "
@ -393,9 +387,17 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency)
tableDDLCommand));
}
/* we need to drop table, if exists, first to make table creation idempotent */
/*
* We need to drop table, if exists, first to make table creation
* idempotent. Before dropping the table, we should also break
* dependencies with sequences since `drop cascade table` would also
* drop depended sequences. This is safe as we still record dependency
* with the sequence during table creation.
*/
commandList = lcons(DropTableIfExistsCommand(relationId),
commandList);
commandList = lcons(WorkerDropSequenceDependencyCommand(relationId),
commandList);
}
return commandList;
@ -521,7 +523,7 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency)
*/
Assert(false);
ereport(ERROR, (errmsg("unsupported object %s for distribution by citus",
getObjectTypeDescription_compat(dependency,
getObjectTypeDescription(dependency,
/* missingOk: */ false)),
errdetail(

View File

@ -294,8 +294,8 @@ static DistributeObjectOps Any_CreateForeignServer = {
static DistributeObjectOps Any_CreateSchema = {
.deparse = DeparseCreateSchemaStmt,
.qualify = NULL,
.preprocess = PreprocessCreateSchemaStmt,
.postprocess = NULL,
.preprocess = NULL,
.postprocess = PostprocessCreateSchemaStmt,
.operationType = DIST_OPS_CREATE,
.address = CreateSchemaStmtObjectAddress,
.markDistributed = true,
@ -1024,6 +1024,15 @@ static DistributeObjectOps Routine_Rename = {
.address = RenameFunctionStmtObjectAddress,
.markDistributed = false,
};
static DistributeObjectOps Schema_AlterOwner = {
.deparse = DeparseAlterSchemaOwnerStmt,
.qualify = NULL,
.preprocess = PreprocessAlterDistributedObjectStmt,
.operationType = DIST_OPS_ALTER,
.postprocess = NULL,
.address = AlterSchemaOwnerStmtObjectAddress,
.markDistributed = false,
};
static DistributeObjectOps Schema_Drop = {
.deparse = DeparseDropSchemaStmt,
.qualify = NULL,
@ -1457,6 +1466,11 @@ GetDistributeObjectOps(Node *node)
return &Routine_AlterOwner;
}
case OBJECT_SCHEMA:
{
return &Schema_AlterOwner;
}
case OBJECT_STATISTIC_EXT:
{
return &Statistics_AlterOwner;
@ -1517,7 +1531,7 @@ GetDistributeObjectOps(Node *node)
case T_AlterTableStmt:
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
switch (AlterTableStmtObjType_compat(stmt))
switch (stmt->objtype)
{
case OBJECT_TYPE:
{

View File

@ -206,11 +206,7 @@ MakeCollateClauseFromOid(Oid collationOid)
List *objName = NIL;
List *objArgs = NIL;
#if PG_VERSION_NUM >= PG_VERSION_14
getObjectIdentityParts(&collateAddress, &objName, &objArgs, false);
#else
getObjectIdentityParts(&collateAddress, &objName, &objArgs);
#endif
char *name = NULL;
foreach_ptr(name, objName)

View File

@ -90,7 +90,27 @@ master_remove_partition_metadata(PG_FUNCTION_ARGS)
DeletePartitionRow(relationId);
/*
* We want to keep using the same colocation group for the tenant even if
* all the tables that belong to it are dropped and new tables are created
* for the tenant etc. For this reason, if a colocation group belongs to a
* tenant schema, we don't delete the colocation group even if there are no
* tables that belong to it.
*
* We do the same if system catalog cannot find the schema of the table
* because this means that the whole schema is dropped.
*
* In that case, we want to delete the colocation group regardless of
* whether the schema is a tenant schema or not. Even more, calling
* IsTenantSchema() with InvalidOid would cause an error, hence we check
* whether the schema is valid beforehand.
*/
bool missingOk = true;
Oid schemaId = get_namespace_oid(schemaName, missingOk);
if (!OidIsValid(schemaId) || !IsTenantSchema(schemaId))
{
DeleteColocationGroupIfNoTablesBelong(colocationId);
}
PG_RETURN_VOID();
}

View File

@ -303,6 +303,11 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
/*
* Foreign keys from citus local tables or reference tables to distributed
* tables are not supported.
*
* We could support foreign keys from references tables to single-shard
* tables but this doesn't seem useful a lot. However, if we decide supporting
* this, then we need to expand relation access tracking check for the single-shard
* tables too.
*/
if (referencingIsCitusLocalOrRefTable && !referencedIsCitusLocalOrRefTable)
{
@ -361,7 +366,12 @@ ErrorIfUnsupportedForeignConstraintExists(Relation relation, char referencingDis
* if tables are hash-distributed and colocated, we need to make sure that
* the distribution key is included in foreign constraint.
*/
if (!referencedIsCitusLocalOrRefTable && !foreignConstraintOnDistKey)
bool referencedIsSingleShardTable =
IsSingleShardTableByDistParams(referencedDistMethod,
referencedReplicationModel,
referencedColocationId);
if (!referencedIsCitusLocalOrRefTable && !referencedIsSingleShardTable &&
!foreignConstraintOnDistKey)
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot create foreign key constraint"),
@ -886,6 +896,48 @@ GetForeignConstraintCommandsInternal(Oid relationId, int flags)
}
/*
* GetFKeyCreationCommandsRelationInvolvedWithTableType returns a list of DDL
* commands to recreate the foreign keys that relation with relationId is involved
* with given table type.
*/
List *
GetFKeyCreationCommandsRelationInvolvedWithTableType(Oid relationId, int tableTypeFlag)
{
int referencingFKeysFlag = INCLUDE_REFERENCING_CONSTRAINTS |
tableTypeFlag;
List *referencingFKeyCreationCommands =
GetForeignConstraintCommandsInternal(relationId, referencingFKeysFlag);
/* already captured self referencing foreign keys, so use EXCLUDE_SELF_REFERENCES */
int referencedFKeysFlag = INCLUDE_REFERENCED_CONSTRAINTS |
EXCLUDE_SELF_REFERENCES |
tableTypeFlag;
List *referencedFKeyCreationCommands =
GetForeignConstraintCommandsInternal(relationId, referencedFKeysFlag);
return list_concat(referencingFKeyCreationCommands, referencedFKeyCreationCommands);
}
/*
* DropFKeysRelationInvolvedWithTableType drops foreign keys that relation
* with relationId is involved with given table type.
*/
void
DropFKeysRelationInvolvedWithTableType(Oid relationId, int tableTypeFlag)
{
int referencingFKeysFlag = INCLUDE_REFERENCING_CONSTRAINTS |
tableTypeFlag;
DropRelationForeignKeys(relationId, referencingFKeysFlag);
/* already captured self referencing foreign keys, so use EXCLUDE_SELF_REFERENCES */
int referencedFKeysFlag = INCLUDE_REFERENCED_CONSTRAINTS |
EXCLUDE_SELF_REFERENCES |
tableTypeFlag;
DropRelationForeignKeys(relationId, referencedFKeysFlag);
}
/*
* HasForeignKeyWithLocalTable returns true if relation has foreign key
* relationship with a local table.
@ -1304,6 +1356,10 @@ IsTableTypeIncluded(Oid relationId, int flags)
{
return (flags & INCLUDE_LOCAL_TABLES) != 0;
}
else if (IsCitusTableType(relationId, SINGLE_SHARD_DISTRIBUTED))
{
return (flags & INCLUDE_SINGLE_SHARD_TABLES) != 0;
}
else if (IsCitusTableType(relationId, DISTRIBUTED_TABLE))
{
return (flags & INCLUDE_DISTRIBUTED_TABLES) != 0;

View File

@ -105,6 +105,9 @@ static void DistributeFunctionColocatedWithDistributedTable(RegProcedure funcOid
char *colocateWithTableName,
const ObjectAddress *
functionAddress);
static void DistributeFunctionColocatedWithSingleShardTable(const
ObjectAddress *functionAddress,
text *colocateWithText);
static void DistributeFunctionColocatedWithReferenceTable(const
ObjectAddress *functionAddress);
static List * FilterDistributedFunctions(GrantStmt *grantStmt);
@ -133,6 +136,7 @@ create_distributed_function(PG_FUNCTION_ARGS)
Oid distributionArgumentOid = InvalidOid;
bool colocatedWithReferenceTable = false;
bool colocatedWithSingleShardTable = false;
char *distributionArgumentName = NULL;
char *colocateWithTableName = NULL;
@ -187,6 +191,8 @@ create_distributed_function(PG_FUNCTION_ARGS)
Oid colocationRelationId = ResolveRelationId(colocateWithText, false);
colocatedWithReferenceTable = IsCitusTableType(colocationRelationId,
REFERENCE_TABLE);
colocatedWithSingleShardTable = IsCitusTableType(colocationRelationId,
SINGLE_SHARD_DISTRIBUTED);
}
}
@ -276,11 +282,16 @@ create_distributed_function(PG_FUNCTION_ARGS)
forceDelegationAddress,
functionAddress);
}
else if (!colocatedWithReferenceTable)
else if (!colocatedWithReferenceTable && !colocatedWithSingleShardTable)
{
DistributeFunctionColocatedWithDistributedTable(funcOid, colocateWithTableName,
functionAddress);
}
else if (colocatedWithSingleShardTable)
{
DistributeFunctionColocatedWithSingleShardTable(functionAddress,
colocateWithText);
}
else if (colocatedWithReferenceTable)
{
/*
@ -435,6 +446,25 @@ DistributeFunctionColocatedWithDistributedTable(RegProcedure funcOid,
}
/*
* DistributeFunctionColocatedWithSingleShardTable updates pg_dist_object records for
* a function/procedure that is colocated with a single shard table.
*/
static void
DistributeFunctionColocatedWithSingleShardTable(const ObjectAddress *functionAddress,
text *colocateWithText)
{
/* get the single shard table's colocation id */
int colocationId = TableColocationId(ResolveRelationId(colocateWithText, false));
/* set distribution argument to NULL */
int *distributionArgumentIndex = NULL;
UpdateFunctionDistributionInfo(functionAddress, distributionArgumentIndex,
&colocationId,
NULL);
}
/*
* DistributeFunctionColocatedWithReferenceTable updates pg_dist_object records for
* a function/procedure that is colocated with a reference table.
@ -641,6 +671,19 @@ EnsureFunctionCanBeColocatedWithTable(Oid functionOid, Oid distributionColumnTyp
CitusTableCacheEntry *sourceTableEntry = GetCitusTableCacheEntry(sourceRelationId);
char sourceReplicationModel = sourceTableEntry->replicationModel;
if (IsCitusTableTypeCacheEntry(sourceTableEntry, SINGLE_SHARD_DISTRIBUTED) &&
distributionColumnType != InvalidOid)
{
char *functionName = get_func_name(functionOid);
char *sourceRelationName = get_rel_name(sourceRelationId);
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot colocate function \"%s\" and table \"%s\" because "
"distribution arguments are not supported when "
"colocating with single shard distributed tables.",
functionName, sourceRelationName)));
}
if (!IsCitusTableTypeCacheEntry(sourceTableEntry, HASH_DISTRIBUTED) &&
!IsCitusTableTypeCacheEntry(sourceTableEntry, REFERENCE_TABLE))
{
@ -1598,7 +1641,7 @@ PreprocessAlterFunctionDependsStmt(Node *node, const char *queryString,
* workers
*/
const char *functionName =
getObjectIdentity_compat(address, /* missingOk: */ false);
getObjectIdentity(address, /* missingOk: */ false);
ereport(ERROR, (errmsg("distrtibuted functions are not allowed to depend on an "
"extension"),
errdetail("Function \"%s\" is already distributed. Functions from "
@ -1768,7 +1811,7 @@ GenerateBackupNameForProcCollision(const ObjectAddress *address)
List *newProcName = list_make2(namespace, makeString(newName));
/* don't need to rename if the input arguments don't match */
FuncCandidateList clist = FuncnameGetCandidates_compat(newProcName, numargs, NIL,
FuncCandidateList clist = FuncnameGetCandidates(newProcName, numargs, NIL,
false, false, false, true);
for (; clist; clist = clist->next)
{

View File

@ -216,7 +216,7 @@ DoLocalCopy(StringInfo buffer, Oid relationId, int64 shardId, CopyStmt *copyStat
ParseState *pState = make_parsestate(NULL);
(void) addRangeTableEntryForRelation(pState, shard, AccessShareLock,
NULL, false, false);
CopyFromState cstate = BeginCopyFrom_compat(pState, shard, NULL, NULL, false,
CopyFromState cstate = BeginCopyFrom(pState, shard, NULL, NULL, false,
ReadFromLocalBufferCallback,
copyStatement->attlist,
copyStatement->options);

View File

@ -258,9 +258,6 @@ static CopyCoercionData * ColumnCoercionPaths(TupleDesc destTupleDescriptor,
Oid *finalColumnTypeArray);
static FmgrInfo * TypeOutputFunctions(uint32 columnCount, Oid *typeIdArray,
bool binaryFormat);
#if PG_VERSION_NUM < PG_VERSION_14
static List * CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist);
#endif
static bool CopyStatementHasFormat(CopyStmt *copyStatement, char *formatName);
static void CitusCopyFrom(CopyStmt *copyStatement, QueryCompletion *completionTag);
static void EnsureCopyCanRunOnRelation(Oid relationId);
@ -609,7 +606,7 @@ CopyToExistingShards(CopyStmt *copyStatement, QueryCompletion *completionTag)
}
/* initialize copy state to read from COPY data source */
CopyFromState copyState = BeginCopyFrom_compat(NULL,
CopyFromState copyState = BeginCopyFrom(NULL,
copiedDistributedRelation,
NULL,
copyStatement->filename,
@ -648,9 +645,7 @@ CopyToExistingShards(CopyStmt *copyStatement, QueryCompletion *completionTag)
++processedRowCount;
#if PG_VERSION_NUM >= PG_VERSION_14
pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED, processedRowCount);
#endif
}
EndCopyFrom(copyState);
@ -890,28 +885,8 @@ CanUseBinaryCopyFormatForType(Oid typeId)
HeapTuple typeTup = typeidType(typeId);
Form_pg_type type = (Form_pg_type) GETSTRUCT(typeTup);
Oid elementType = type->typelem;
#if PG_VERSION_NUM < PG_VERSION_14
char typeCategory = type->typcategory;
#endif
ReleaseSysCache(typeTup);
#if PG_VERSION_NUM < PG_VERSION_14
/*
* In PG versions before PG14 the array_recv function would error out more
* than necessary.
*
* It errors out when the element type its oids don't match with the oid in
* the received data. This happens pretty much always for non built in
* types, because their oids differ between postgres intallations. So we
* skip binary encoding when the element type is a non built in type.
*/
if (typeCategory == TYPCATEGORY_ARRAY && elementType >= FirstNormalObjectId)
{
return false;
}
#endif
/*
* Any type that is a wrapper around an element type (e.g. arrays and
* ranges) require the element type to also has support for binary
@ -1682,20 +1657,6 @@ AppendCopyBinaryFooters(CopyOutState footerOutputState)
static void
SendCopyBegin(CopyOutState cstate)
{
#if PG_VERSION_NUM < PG_VERSION_14
if (PG_PROTOCOL_MAJOR(FrontendProtocol) < 3) {
/* old way */
if (cstate->binary)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY BINARY is not supported to stdout or from stdin")));
pq_putemptymessage('H');
/* grottiness needed for old COPY OUT protocol */
pq_startcopyout();
cstate->copy_dest = COPY_OLD_FE;
return;
}
#endif
StringInfoData buf;
int natts = list_length(cstate->attnumlist);
int16 format = (cstate->binary ? 1 : 0);
@ -1715,16 +1676,6 @@ SendCopyBegin(CopyOutState cstate)
static void
SendCopyEnd(CopyOutState cstate)
{
#if PG_VERSION_NUM < PG_VERSION_14
if (cstate->copy_dest != COPY_NEW_FE)
{
CopySendData(cstate, "\\.", 2);
/* Need to flush out the trailer (this also appends a newline) */
CopySendEndOfRow(cstate, true);
pq_endcopyout(false);
return;
}
#endif
/* Shouldn't have any unsent data */
Assert(cstate->fe_msgbuf->len == 0);
/* Send Copy Done message */
@ -1782,21 +1733,6 @@ CopySendEndOfRow(CopyOutState cstate, bool includeEndOfLine)
switch (cstate->copy_dest)
{
#if PG_VERSION_NUM < PG_VERSION_14
case COPY_OLD_FE:
/* The FE/BE protocol uses \n as newline for all platforms */
if (!cstate->binary && includeEndOfLine)
CopySendChar(cstate, '\n');
if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
{
/* no hope of recovering connection sync, so FATAL */
ereport(FATAL,
(errcode(ERRCODE_CONNECTION_FAILURE),
errmsg("connection lost during COPY to stdout")));
}
break;
#endif
case COPY_FRONTEND:
/* The FE/BE protocol uses \n as newline for all platforms */
if (!cstate->binary && includeEndOfLine)
@ -2128,13 +2064,37 @@ CitusCopyDestReceiverStartup(DestReceiver *dest, int operation,
int columnCount = inputTupleDescriptor->natts;
Oid *finalTypeArray = palloc0(columnCount * sizeof(Oid));
/*
* To ensure the proper co-location and distribution of the target table,
* the entire process of repartitioning intermediate files requires the
* destReceiver to be created on the target rather than the source.
*
* Within this specific code path, it is assumed that the employed model
* is for insert-select. Consequently, it validates the column types of
* destTupleDescriptor(target) during the intermediate result generation
* process. However, this approach varies significantly for MERGE operations,
* where the source tuple(s) can have arbitrary types and are not required to
* align with the target column names.
*
* Despite this minor setback, a significant portion of the code responsible
* for repartitioning intermediate files can be reused for the MERGE
* operation. By leveraging the ability to perform actual coercion during
* the writing process to the target table, we can bypass this specific route.
*/
if (copyDest->skipCoercions)
{
copyDest->columnOutputFunctions =
ColumnOutputFunctions(inputTupleDescriptor, copyOutState->binary);
}
else
{
copyDest->columnCoercionPaths =
ColumnCoercionPaths(destTupleDescriptor, inputTupleDescriptor,
tableId, columnNameList, finalTypeArray);
copyDest->columnOutputFunctions =
TypeOutputFunctions(columnCount, finalTypeArray, copyOutState->binary);
}
}
/* wrap the column names as Values */
foreach(columnNameCell, columnNameList)
@ -2146,6 +2106,7 @@ CitusCopyDestReceiverStartup(DestReceiver *dest, int operation,
}
if (IsCitusTableTypeCacheEntry(cacheEntry, DISTRIBUTED_TABLE) &&
!IsCitusTableTypeCacheEntry(cacheEntry, SINGLE_SHARD_DISTRIBUTED) &&
copyDest->partitionColumnIndex == INVALID_PARTITION_COLUMN_INDEX)
{
ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
@ -2596,10 +2557,12 @@ ShardIdForTuple(CitusCopyDestReceiver *copyDest, Datum *columnValues, bool *colu
/* find the partition column value */
partitionColumnValue = columnValues[partitionColumnIndex];
if (!copyDest->skipCoercions)
{
/* annoyingly this is evaluated twice, but at least we don't crash! */
partitionColumnValue = CoerceColumnValue(partitionColumnValue, coercePath);
}
}
/*
* Find the shard interval and id for the partition column value for
@ -3229,92 +3192,6 @@ CreateRangeTable(Relation rel, AclMode requiredAccess)
}
#if PG_VERSION_NUM < PG_VERSION_14
/* Helper for CheckCopyPermissions(), copied from postgres */
static List *
CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist)
{
/* *INDENT-OFF* */
List *attnums = NIL;
if (attnamelist == NIL)
{
/* Generate default column list */
int attr_count = tupDesc->natts;
int i;
for (i = 0; i < attr_count; i++)
{
if (TupleDescAttr(tupDesc, i)->attisdropped)
continue;
if (TupleDescAttr(tupDesc, i)->attgenerated)
continue;
attnums = lappend_int(attnums, i + 1);
}
}
else
{
/* Validate the user-supplied list and extract attnums */
ListCell *l;
foreach(l, attnamelist)
{
char *name = strVal(lfirst(l));
int attnum;
int i;
/* Lookup column name */
attnum = InvalidAttrNumber;
for (i = 0; i < tupDesc->natts; i++)
{
Form_pg_attribute att = TupleDescAttr(tupDesc, i);
if (att->attisdropped)
continue;
if (namestrcmp(&(att->attname), name) == 0)
{
if (att->attgenerated)
ereport(ERROR,
(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
errmsg("column \"%s\" is a generated column",
name),
errdetail("Generated columns cannot be used in COPY.")));
attnum = att->attnum;
break;
}
}
if (attnum == InvalidAttrNumber)
{
if (rel != NULL)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("column \"%s\" of relation \"%s\" does not exist",
name, RelationGetRelationName(rel))));
else
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("column \"%s\" does not exist",
name)));
}
/* Check for duplicates */
if (list_member_int(attnums, attnum))
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_COLUMN),
errmsg("column \"%s\" specified more than once",
name)));
attnums = lappend_int(attnums, attnum);
}
}
return attnums;
/* *INDENT-ON* */
}
#endif
/*
* CreateConnectionStateHash constructs a hash table which maps from socket
* number to CopyConnectionState, passing the provided MemoryContext to

View File

@ -19,6 +19,7 @@
#include "catalog/namespace.h"
#include "catalog/pg_class.h"
#include "catalog/pg_namespace.h"
#include "distributed/colocation_utils.h"
#include "distributed/commands.h"
#include <distributed/connection_management.h>
#include "distributed/commands/utility_hook.h"
@ -33,6 +34,7 @@
#include "distributed/resource_lock.h"
#include <distributed/remote_commands.h>
#include <distributed/remote_commands.h>
#include "distributed/tenant_schema_metadata.h"
#include "distributed/version_compat.h"
#include "nodes/parsenodes.h"
#include "utils/fmgroids.h"
@ -45,16 +47,18 @@ static List * FilterDistributedSchemas(List *schemas);
static bool SchemaHasDistributedTableWithFKey(char *schemaName);
static bool ShouldPropagateCreateSchemaStmt(void);
static List * GetGrantCommandsFromCreateSchemaStmt(Node *node);
static bool CreateSchemaStmtCreatesTable(CreateSchemaStmt *stmt);
/*
* PreprocessCreateSchemaStmt is called during the planning phase for
* PostprocessCreateSchemaStmt is called during the planning phase for
* CREATE SCHEMA ..
*/
List *
PreprocessCreateSchemaStmt(Node *node, const char *queryString,
ProcessUtilityContext processUtilityContext)
PostprocessCreateSchemaStmt(Node *node, const char *queryString)
{
CreateSchemaStmt *createSchemaStmt = castNode(CreateSchemaStmt, node);
if (!ShouldPropagateCreateSchemaStmt())
{
return NIL;
@ -64,6 +68,16 @@ PreprocessCreateSchemaStmt(Node *node, const char *queryString,
EnsureSequentialMode(OBJECT_SCHEMA);
bool missingOk = createSchemaStmt->if_not_exists;
List *schemaAdressList = CreateSchemaStmtObjectAddress(node, missingOk, true);
Assert(list_length(schemaAdressList) == 1);
ObjectAddress *schemaAdress = linitial(schemaAdressList);
Oid schemaId = schemaAdress->objectId;
if (!OidIsValid(schemaId))
{
return NIL;
}
/* to prevent recursion with mx we disable ddl propagation */
List *commands = list_make1(DISABLE_DDL_PROPAGATION);
@ -74,6 +88,37 @@ PreprocessCreateSchemaStmt(Node *node, const char *queryString,
commands = list_concat(commands, GetGrantCommandsFromCreateSchemaStmt(node));
char *schemaName = get_namespace_name(schemaId);
if (ShouldUseSchemaBasedSharding(schemaName))
{
/* for now, we don't allow creating tenant tables when creating the schema itself */
if (CreateSchemaStmtCreatesTable(createSchemaStmt))
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot create distributed schema and table in a "
"single statement"),
errhint("SET citus.enable_schema_based_sharding TO off, "
"or create the schema and table in separate "
"commands.")));
}
/*
* Register the tenant schema on the coordinator and save the command
* to register it on the workers.
*/
int shardCount = 1;
int replicationFactor = 1;
Oid distributionColumnType = InvalidOid;
Oid distributionColumnCollation = InvalidOid;
uint32 colocationId = CreateColocationGroup(
shardCount, replicationFactor, distributionColumnType,
distributionColumnCollation);
InsertTenantSchemaLocally(schemaId, colocationId);
commands = lappend(commands, TenantSchemaInsertCommand(schemaId, colocationId));
}
commands = lappend(commands, ENABLE_DDL_PROPAGATION);
return NodeDDLTaskList(NON_COORDINATOR_NODES, commands);
@ -214,6 +259,20 @@ CreateSchemaStmtObjectAddress(Node *node, bool missing_ok, bool isPostprocess)
}
/*
* AlterSchemaOwnerStmtObjectAddress returns the ObjectAddress of the schema that is
* the object of the AlterOwnerStmt. Errors if missing_ok is false.
*/
List *
AlterSchemaOwnerStmtObjectAddress(Node *node, bool missing_ok, bool isPostprocess)
{
AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node);
Assert(stmt->objectType == OBJECT_SCHEMA);
return GetObjectAddressBySchemaName(strVal(stmt->object), missing_ok);
}
/*
* AlterSchemaRenameStmtObjectAddress returns the ObjectAddress of the schema that is
* the object of the RenameStmt. Errors if missing_ok is false.
@ -402,3 +461,27 @@ GetGrantCommandsFromCreateSchemaStmt(Node *node)
return commands;
}
/*
* CreateSchemaStmtCreatesTable returns true if given CreateSchemaStmt
* creates a table using "schema_element" list.
*/
static bool
CreateSchemaStmtCreatesTable(CreateSchemaStmt *stmt)
{
Node *element = NULL;
foreach_ptr(element, stmt->schemaElts)
{
/*
* CREATE TABLE AS and CREATE FOREIGN TABLE commands cannot be
* used as schema_elements anyway, so we don't need to check them.
*/
if (IsA(element, CreateStmt))
{
return true;
}
}
return false;
}

View File

@ -0,0 +1,774 @@
/*-------------------------------------------------------------------------
* schema_based_sharding.c
*
* Routines for schema-based sharding.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "miscadmin.h"
#include "access/genam.h"
#include "catalog/catalog.h"
#include "catalog/pg_namespace_d.h"
#include "commands/extension.h"
#include "distributed/argutils.h"
#include "distributed/backend_data.h"
#include "distributed/colocation_utils.h"
#include "distributed/commands.h"
#include "distributed/listutils.h"
#include "distributed/metadata_sync.h"
#include "distributed/metadata/distobject.h"
#include "distributed/multi_partitioning_utils.h"
#include "distributed/tenant_schema_metadata.h"
#include "distributed/worker_shard_visibility.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
static void UnregisterTenantSchemaGlobally(Oid schemaId, char *schemaName);
static List * SchemaGetNonShardTableIdList(Oid schemaId);
static void EnsureSchemaCanBeDistributed(Oid schemaId, List *schemaTableIdList);
static void EnsureTenantSchemaNameAllowed(Oid schemaId);
static void EnsureTableKindSupportedForTenantSchema(Oid relationId);
static void EnsureFKeysForTenantTable(Oid relationId);
static void EnsureSchemaExist(Oid schemaId);
/* controlled via citus.enable_schema_based_sharding GUC */
bool EnableSchemaBasedSharding = false;
const char *TenantOperationNames[TOTAL_TENANT_OPERATION] = {
"undistribute_table",
"alter_distributed_table",
"colocate_with",
"update_distributed_table_colocation",
"set schema",
};
PG_FUNCTION_INFO_V1(citus_internal_unregister_tenant_schema_globally);
PG_FUNCTION_INFO_V1(citus_schema_distribute);
PG_FUNCTION_INFO_V1(citus_schema_undistribute);
/*
* ShouldUseSchemaBasedSharding returns true if schema given name should be
* used as a tenant schema.
*/
bool
ShouldUseSchemaBasedSharding(char *schemaName)
{
if (!EnableSchemaBasedSharding)
{
return false;
}
if (IsBinaryUpgrade)
{
return false;
}
/*
* Citus utility hook skips processing CREATE SCHEMA commands while an
* extension is being created. For this reason, we don't expect to get
* here while an extension is being created.
*/
Assert(!creating_extension);
/*
* CREATE SCHEMA commands issued by internal backends are not meant to
* create tenant schemas but to sync metadata.
*
* On workers, Citus utility hook skips processing CREATE SCHEMA commands
* because we temporarily disable DDL propagation on workers when sending
* CREATE SCHEMA commands. For this reason, right now this check is a bit
* redundant but we prefer to keep it here to be on the safe side.
*/
if (IsCitusInternalBackend() || IsRebalancerInternalBackend())
{
return false;
}
/*
* Not do an oid comparison based on PG_PUBLIC_NAMESPACE because
* we want to treat "public" schema in the same way even if it's
* recreated.
*/
if (strcmp(schemaName, "public") == 0)
{
return false;
}
return true;
}
/*
* ShouldCreateTenantSchemaTable returns true if we should create a tenant
* schema table for given relationId.
*/
bool
ShouldCreateTenantSchemaTable(Oid relationId)
{
if (IsBinaryUpgrade)
{
return false;
}
/*
* CREATE TABLE commands issued by internal backends are not meant to
* create tenant tables but to sync metadata.
*/
if (IsCitusInternalBackend() || IsRebalancerInternalBackend())
{
return false;
}
Oid schemaId = get_rel_namespace(relationId);
return IsTenantSchema(schemaId);
}
/*
* EnsureTableKindSupportedForTenantSchema ensures that given table's kind is
* supported by a tenant schema.
*/
static void
EnsureTableKindSupportedForTenantSchema(Oid relationId)
{
if (IsForeignTable(relationId))
{
ereport(ERROR, (errmsg("cannot create a foreign table in a distributed "
"schema")));
}
if (PartitionTable(relationId))
{
ErrorIfIllegalPartitioningInTenantSchema(PartitionParentOid(relationId),
relationId);
}
if (PartitionedTable(relationId))
{
List *partitionList = PartitionList(relationId);
Oid partitionRelationId = InvalidOid;
foreach_oid(partitionRelationId, partitionList)
{
ErrorIfIllegalPartitioningInTenantSchema(relationId, partitionRelationId);
}
}
if (IsChildTable(relationId) || IsParentTable(relationId))
{
ereport(ERROR, (errmsg("tables in a distributed schema cannot inherit or "
"be inherited")));
}
}
/*
* EnsureFKeysForTenantTable ensures that all referencing and referenced foreign
* keys are allowed for given table.
*/
static void
EnsureFKeysForTenantTable(Oid relationId)
{
Oid tenantSchemaId = get_rel_namespace(relationId);
int fKeyReferencingFlags = INCLUDE_REFERENCING_CONSTRAINTS | INCLUDE_ALL_TABLE_TYPES;
List *referencingForeignKeys = GetForeignKeyOids(relationId, fKeyReferencingFlags);
Oid foreignKeyId = InvalidOid;
foreach_oid(foreignKeyId, referencingForeignKeys)
{
Oid referencingTableId = GetReferencingTableId(foreignKeyId);
Oid referencedTableId = GetReferencedTableId(foreignKeyId);
Oid referencedTableSchemaId = get_rel_namespace(referencedTableId);
/* We allow foreign keys to a table in the same schema */
if (tenantSchemaId == referencedTableSchemaId)
{
continue;
}
/*
* Allow foreign keys to the other schema only if the referenced table is
* a reference table.
*/
if (!IsCitusTable(referencedTableId) ||
!IsCitusTableType(referencedTableId, REFERENCE_TABLE))
{
ereport(ERROR, (errmsg("foreign keys from distributed schemas can only "
"point to the same distributed schema or reference "
"tables in regular schemas"),
errdetail("\"%s\" references \"%s\" via foreign key "
"constraint \"%s\"",
generate_qualified_relation_name(
referencingTableId),
generate_qualified_relation_name(referencedTableId),
get_constraint_name(foreignKeyId))));
}
}
int fKeyReferencedFlags = INCLUDE_REFERENCED_CONSTRAINTS | INCLUDE_ALL_TABLE_TYPES;
List *referencedForeignKeys = GetForeignKeyOids(relationId, fKeyReferencedFlags);
foreach_oid(foreignKeyId, referencedForeignKeys)
{
Oid referencingTableId = GetReferencingTableId(foreignKeyId);
Oid referencedTableId = GetReferencedTableId(foreignKeyId);
Oid referencingTableSchemaId = get_rel_namespace(referencingTableId);
/* We allow foreign keys from a table in the same schema */
if (tenantSchemaId == referencingTableSchemaId)
{
continue;
}
/* Not allow any foreign keys from the other schema */
ereport(ERROR, (errmsg("cannot create foreign keys to tables in a distributed "
"schema from another schema"),
errdetail("\"%s\" references \"%s\" via foreign key "
"constraint \"%s\"",
generate_qualified_relation_name(referencingTableId),
generate_qualified_relation_name(referencedTableId),
get_constraint_name(foreignKeyId))));
}
}
/*
* CreateTenantSchemaTable creates a tenant table with given relationId.
*
* This means creating a single shard distributed table without a shard
* key and colocating it with the other tables in its schema.
*/
void
CreateTenantSchemaTable(Oid relationId)
{
if (!IsCoordinator())
{
/*
* We don't support creating tenant tables from workers. We could
* let ShouldCreateTenantSchemaTable() to return false to allow users
* to create a local table as usual but that would be confusing because
* it might sound like we allow creating tenant tables from workers.
* For this reason, we prefer to throw an error instead.
*
* Indeed, CreateSingleShardTable() would already do so but we
* prefer to throw an error with a more meaningful message, rather
* than saying "operation is not allowed on this node".
*/
ereport(ERROR, (errmsg("cannot create tables in a distributed schema from "
"a worker node"),
errhint("Connect to the coordinator node and try again.")));
}
EnsureTableKindSupportedForTenantSchema(relationId);
/*
* We don't expect this to happen because ShouldCreateTenantSchemaTable()
* should've already verified that; but better to check.
*/
Oid schemaId = get_rel_namespace(relationId);
uint32 colocationId = SchemaIdGetTenantColocationId(schemaId);
if (colocationId == INVALID_COLOCATION_ID)
{
ereport(ERROR, (errmsg("schema \"%s\" is not distributed",
get_namespace_name(schemaId))));
}
ColocationParam colocationParam = {
.colocationParamType = COLOCATE_WITH_COLOCATION_ID,
.colocationId = colocationId,
};
CreateSingleShardTable(relationId, colocationParam);
}
/*
* ErrorIfIllegalPartitioningInTenantSchema throws an error if the
* partitioning relationship between the parent and the child is illegal
* because they are in different schemas while one of them is a tenant table.
*
* This function assumes that either the parent or the child are in a tenant
* schema.
*/
void
ErrorIfIllegalPartitioningInTenantSchema(Oid parentRelationId, Oid partitionRelationId)
{
if (get_rel_namespace(partitionRelationId) != get_rel_namespace(parentRelationId))
{
ereport(ERROR, (errmsg("partitioning within a distributed schema is not "
"supported when the parent and the child "
"are in different schemas")));
}
}
/*
* CreateTenantSchemaColocationId returns new colocation id for a tenant schema.
*/
uint32
CreateTenantSchemaColocationId(void)
{
int shardCount = 1;
int replicationFactor = 1;
Oid distributionColumnType = InvalidOid;
Oid distributionColumnCollation = InvalidOid;
uint32 schemaColocationId = CreateColocationGroup(
shardCount, replicationFactor, distributionColumnType,
distributionColumnCollation);
return schemaColocationId;
}
/*
* SchemaGetNonShardTableIdList returns all nonshard relation ids
* inside given schema.
*/
static List *
SchemaGetNonShardTableIdList(Oid schemaId)
{
List *relationIdList = NIL;
/* scan all relations in pg_class and return all tables inside given schema */
Relation relationRelation = relation_open(RelationRelationId, AccessShareLock);
ScanKeyData scanKey[1] = { 0 };
ScanKeyInit(&scanKey[0], Anum_pg_class_relnamespace, BTEqualStrategyNumber,
F_OIDEQ, ObjectIdGetDatum(schemaId));
SysScanDesc scanDescriptor = systable_beginscan(relationRelation, ClassNameNspIndexId,
true, NULL, 1, scanKey);
HeapTuple heapTuple = NULL;
while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor)))
{
Form_pg_class relationForm = (Form_pg_class) GETSTRUCT(heapTuple);
char *relationName = NameStr(relationForm->relname);
Oid relationId = get_relname_relid(relationName, schemaId);
if (!OidIsValid(relationId))
{
ereport(ERROR, errmsg("table %s is dropped by a concurrent operation",
relationName));
}
/* skip shards */
if (RelationIsAKnownShard(relationId))
{
continue;
}
if (RegularTable(relationId) || PartitionTable(relationId) ||
IsForeignTable(relationId))
{
relationIdList = lappend_oid(relationIdList, relationId);
}
}
systable_endscan(scanDescriptor);
relation_close(relationRelation, AccessShareLock);
return relationIdList;
}
/*
* EnsureSchemaCanBeDistributed ensures the schema can be distributed.
* Caller should take required the lock on relations and the schema.
*
* It checks:
* - Schema name is in the allowed-list,
* - Schema does not depend on an extension (created by extension),
* - No extension depends on the schema (CREATE EXTENSION <ext> SCHEMA <schema>),
* - Some checks for the table for being a valid tenant table.
*/
static void
EnsureSchemaCanBeDistributed(Oid schemaId, List *schemaTableIdList)
{
/* Ensure schema name is allowed */
EnsureTenantSchemaNameAllowed(schemaId);
/* Any schema owned by extension is not allowed */
char *schemaName = get_namespace_name(schemaId);
ObjectAddress *schemaAddress = palloc0(sizeof(ObjectAddress));
ObjectAddressSet(*schemaAddress, NamespaceRelationId, schemaId);
if (IsAnyObjectAddressOwnedByExtension(list_make1(schemaAddress), NULL))
{
ereport(ERROR, (errmsg("schema %s, which is owned by an extension, cannot "
"be distributed", schemaName)));
}
/* Extension schemas are not allowed */
ObjectAddress *extensionAddress = FirstExtensionWithSchema(schemaId);
if (extensionAddress)
{
char *extensionName = get_extension_name(extensionAddress->objectId);
ereport(ERROR, (errmsg("schema %s cannot be distributed since it is the schema "
"of extension %s", schemaName, extensionName)));
}
Oid relationId = InvalidOid;
foreach_oid(relationId, schemaTableIdList)
{
EnsureTenantTable(relationId, "citus_schema_distribute");
}
}
/*
* EnsureTenantTable ensures the table can be a valid tenant table.
* - Current user should be the owner of table,
* - Table kind is supported,
* - Referencing and referenced foreign keys for the table are supported,
* - Table is not owned by an extension,
* - Table should be Citus local or Postgres local table.
*/
void
EnsureTenantTable(Oid relationId, char *operationName)
{
/* Ensure table owner */
EnsureTableOwner(relationId);
/* Check relation kind */
EnsureTableKindSupportedForTenantSchema(relationId);
/* Check foreign keys */
EnsureFKeysForTenantTable(relationId);
/* Check table not owned by an extension */
ObjectAddress *tableAddress = palloc0(sizeof(ObjectAddress));
ObjectAddressSet(*tableAddress, RelationRelationId, relationId);
if (IsAnyObjectAddressOwnedByExtension(list_make1(tableAddress), NULL))
{
Oid schemaId = get_rel_namespace(relationId);
char *tableName = get_namespace_name(schemaId);
ereport(ERROR, (errmsg("schema cannot be distributed since it has "
"table %s which is owned by an extension",
tableName)));
}
/* Postgres local tables are allowed */
if (!IsCitusTable(relationId))
{
return;
}
/* Only Citus local tables, amongst Citus table types, are allowed */
if (!IsCitusTableType(relationId, CITUS_LOCAL_TABLE))
{
ereport(ERROR, (errmsg("distributed schema cannot have distributed tables"),
errhint("Undistribute distributed tables before "
"'%s'.", operationName)));
}
}
/*
* EnsureTenantSchemaNameAllowed ensures if given schema is applicable for registering
* as a tenant schema.
*/
static void
EnsureTenantSchemaNameAllowed(Oid schemaId)
{
char *schemaName = get_namespace_name(schemaId);
/* public schema is not allowed */
if (strcmp(schemaName, "public") == 0)
{
ereport(ERROR, (errmsg("public schema cannot be distributed")));
}
/* information_schema schema is not allowed */
if (strcmp(schemaName, "information_schema") == 0)
{
ereport(ERROR, (errmsg("information_schema schema cannot be distributed")));
}
/* pg_temp_xx and pg_toast_temp_xx schemas are not allowed */
if (isAnyTempNamespace(schemaId))
{
ereport(ERROR, (errmsg("temporary schema cannot be distributed")));
}
/* pg_catalog schema is not allowed */
if (IsCatalogNamespace(schemaId))
{
ereport(ERROR, (errmsg("pg_catalog schema cannot be distributed")));
}
/* pg_toast schema is not allowed */
if (IsToastNamespace(schemaId))
{
ereport(ERROR, (errmsg("pg_toast schema cannot be distributed")));
}
}
/*
* EnsureSchemaExist ensures that schema exists. Caller is responsible to take
* the required lock on the schema.
*/
static void
EnsureSchemaExist(Oid schemaId)
{
if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(schemaId)))
{
ereport(ERROR, (errcode(ERRCODE_UNDEFINED_SCHEMA),
errmsg("schema with OID %u does not exist", schemaId)));
}
}
/*
* UnregisterTenantSchemaGlobally removes given schema from the tenant schema
* metadata table, deletes the colocation group of the schema and sends the
* command to do the same on the workers.
*/
static void
UnregisterTenantSchemaGlobally(Oid schemaId, char *schemaName)
{
uint32 tenantSchemaColocationId = SchemaIdGetTenantColocationId(schemaId);
DeleteTenantSchemaLocally(schemaId);
if (EnableMetadataSync)
{
SendCommandToWorkersWithMetadata(TenantSchemaDeleteCommand(schemaName));
}
DeleteColocationGroup(tenantSchemaColocationId);
}
/*
* citus_internal_unregister_tenant_schema_globally, called by Citus drop hook,
* unregisters the schema when a tenant schema is dropped.
*
* NOTE: We need to pass schema_name as an argument. We cannot use schema id
* to obtain schema name since the schema would have already been dropped when this
* udf is called by the drop hook.
*/
Datum
citus_internal_unregister_tenant_schema_globally(PG_FUNCTION_ARGS)
{
PG_ENSURE_ARGNOTNULL(0, "schema_id");
Oid schemaId = PG_GETARG_OID(0);
PG_ENSURE_ARGNOTNULL(1, "schema_name");
text *schemaName = PG_GETARG_TEXT_PP(1);
char *schemaNameStr = text_to_cstring(schemaName);
/*
* Skip on workers because we expect this to be called from the coordinator
* only via drop hook.
*/
if (!IsCoordinator())
{
PG_RETURN_VOID();
}
/* make sure that the schema is dropped already */
HeapTuple namespaceTuple = SearchSysCache1(NAMESPACEOID, ObjectIdGetDatum(schemaId));
if (HeapTupleIsValid(namespaceTuple))
{
ReleaseSysCache(namespaceTuple);
ereport(ERROR, (errmsg("schema is expected to be already dropped "
"because this function is only expected to "
"be called from Citus drop hook")));
}
UnregisterTenantSchemaGlobally(schemaId, schemaNameStr);
PG_RETURN_VOID();
}
/*
* citus_schema_distribute gets a regular schema name, then converts it to a tenant
* schema.
*/
Datum
citus_schema_distribute(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
EnsureCoordinator();
Oid schemaId = PG_GETARG_OID(0);
EnsureSchemaExist(schemaId);
EnsureSchemaOwner(schemaId);
/* Prevent concurrent table creation under the schema */
LockDatabaseObject(NamespaceRelationId, schemaId, 0, AccessExclusiveLock);
/*
* We should ensure the existence of the schema after taking the lock since
* the schema could have been dropped before we acquired the lock.
*/
EnsureSchemaExist(schemaId);
EnsureSchemaOwner(schemaId);
/* Return if the schema is already a tenant schema */
char *schemaName = get_namespace_name(schemaId);
if (IsTenantSchema(schemaId))
{
ereport(NOTICE, (errmsg("schema %s is already distributed", schemaName)));
PG_RETURN_VOID();
}
/* Take lock on the relations and filter out partition tables */
List *tableIdListInSchema = SchemaGetNonShardTableIdList(schemaId);
List *tableIdListToConvert = NIL;
Oid relationId = InvalidOid;
foreach_oid(relationId, tableIdListInSchema)
{
/* prevent concurrent drop of the relation */
LockRelationOid(relationId, AccessShareLock);
EnsureRelationExists(relationId);
/*
* Skip partitions as they would be distributed by the parent table.
*
* We should filter out partitions here before distributing the schema.
* Otherwise, converted partitioned table would change oid of partitions and its
* partition tables would fail with oid not exist.
*/
if (PartitionTable(relationId))
{
continue;
}
tableIdListToConvert = lappend_oid(tableIdListToConvert, relationId);
}
/* Makes sure the schema can be distributed. */
EnsureSchemaCanBeDistributed(schemaId, tableIdListInSchema);
ereport(NOTICE, (errmsg("distributing the schema %s", schemaName)));
/* Create colocation id and then single shard tables with the colocation id */
uint32 colocationId = CreateTenantSchemaColocationId();
ColocationParam colocationParam = {
.colocationParamType = COLOCATE_WITH_COLOCATION_ID,
.colocationId = colocationId,
};
/*
* Collect foreign keys for recreation and then drop fkeys and create single shard
* tables.
*/
List *originalForeignKeyRecreationCommands = NIL;
foreach_oid(relationId, tableIdListToConvert)
{
List *fkeyCommandsForRelation =
GetFKeyCreationCommandsRelationInvolvedWithTableType(relationId,
INCLUDE_ALL_TABLE_TYPES);
originalForeignKeyRecreationCommands = list_concat(
originalForeignKeyRecreationCommands, fkeyCommandsForRelation);
DropFKeysRelationInvolvedWithTableType(relationId, INCLUDE_ALL_TABLE_TYPES);
CreateSingleShardTable(relationId, colocationParam);
}
/* We can skip foreign key validations as we are sure about them at start */
bool skip_validation = true;
ExecuteForeignKeyCreateCommandList(originalForeignKeyRecreationCommands,
skip_validation);
/* Register the schema locally and sync it to workers */
InsertTenantSchemaLocally(schemaId, colocationId);
char *registerSchemaCommand = TenantSchemaInsertCommand(schemaId, colocationId);
if (EnableMetadataSync)
{
SendCommandToWorkersWithMetadata(registerSchemaCommand);
}
PG_RETURN_VOID();
}
/*
* citus_schema_undistribute gets a tenant schema name, then converts it to a regular
* schema by undistributing all tables under it.
*/
Datum
citus_schema_undistribute(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
EnsureCoordinator();
Oid schemaId = PG_GETARG_OID(0);
EnsureSchemaExist(schemaId);
EnsureSchemaOwner(schemaId);
/* Prevent concurrent table creation under the schema */
LockDatabaseObject(NamespaceRelationId, schemaId, 0, AccessExclusiveLock);
/*
* We should ensure the existence of the schema after taking the lock since
* the schema could have been dropped before we acquired the lock.
*/
EnsureSchemaExist(schemaId);
EnsureSchemaOwner(schemaId);
/* The schema should be a tenant schema */
char *schemaName = get_namespace_name(schemaId);
if (!IsTenantSchema(schemaId))
{
ereport(ERROR, (errmsg("schema %s is not distributed", schemaName)));
}
ereport(NOTICE, (errmsg("undistributing schema %s", schemaName)));
/* Take lock on the relations and filter out partition tables */
List *tableIdListInSchema = SchemaGetNonShardTableIdList(schemaId);
List *tableIdListToConvert = NIL;
Oid relationId = InvalidOid;
foreach_oid(relationId, tableIdListInSchema)
{
/* prevent concurrent drop of the relation */
LockRelationOid(relationId, AccessShareLock);
EnsureRelationExists(relationId);
/*
* Skip partitions as they would be undistributed by the parent table.
*
* We should filter out partitions here before undistributing the schema.
* Otherwise, converted partitioned table would change oid of partitions and its
* partition tables would fail with oid not exist.
*/
if (PartitionTable(relationId))
{
continue;
}
tableIdListToConvert = lappend_oid(tableIdListToConvert, relationId);
/* Only single shard tables are expected during the undistribution of the schema */
Assert(IsCitusTableType(relationId, SINGLE_SHARD_DISTRIBUTED));
}
/*
* First, we need to delete schema metadata and sync it to workers. Otherwise,
* we would get error from `ErrorIfTenantTable` while undistributing the tables.
*/
UnregisterTenantSchemaGlobally(schemaId, schemaName);
UndistributeTables(tableIdListToConvert);
PG_RETURN_VOID();
}
/*
* ErrorIfTenantTable errors out with the given operation name,
* if the given relation is a tenant table.
*/
void
ErrorIfTenantTable(Oid relationId, const char *operationName)
{
if (IsTenantSchema(get_rel_namespace(relationId)))
{
ereport(ERROR, (errmsg("%s is not allowed for %s because it belongs to "
"a distributed schema",
generate_qualified_relation_name(relationId),
operationName)));
}
}

View File

@ -222,7 +222,7 @@ ExtractDefaultColumnsAndOwnedSequences(Oid relationId, List **columnNameList,
bool
ColumnDefaultsToNextVal(Oid relationId, AttrNumber attrNumber)
{
AssertArg(AttributeNumberIsValid(attrNumber));
Assert(AttributeNumberIsValid(attrNumber));
Relation relation = RelationIdGetRelation(relationId);
Node *defExpr = build_column_default(relation, attrNumber);
@ -668,7 +668,7 @@ PreprocessAlterSequenceOwnerStmt(Node *node, const char *queryString,
ProcessUtilityContext processUtilityContext)
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
Assert(stmt->objtype == OBJECT_SEQUENCE);
List *sequenceAddresses = GetObjectAddressListFromParseTree((Node *) stmt, false,
false);
@ -701,7 +701,7 @@ List *
AlterSequenceOwnerStmtObjectAddress(Node *node, bool missing_ok, bool isPostprocess)
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
Assert(stmt->objtype == OBJECT_SEQUENCE);
RangeVar *sequence = stmt->relation;
Oid seqOid = RangeVarGetRelid(sequence, NoLock, missing_ok);
@ -721,7 +721,7 @@ List *
PostprocessAlterSequenceOwnerStmt(Node *node, const char *queryString)
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
Assert(stmt->objtype == OBJECT_SEQUENCE);
List *sequenceAddresses = GetObjectAddressListFromParseTree((Node *) stmt, false,
true);
@ -755,7 +755,7 @@ PreprocessAlterSequencePersistenceStmt(Node *node, const char *queryString,
ProcessUtilityContext processUtilityContext)
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
Assert(stmt->objtype == OBJECT_SEQUENCE);
List *sequenceAddresses = GetObjectAddressListFromParseTree((Node *) stmt, false,
false);
@ -788,7 +788,7 @@ List *
AlterSequencePersistenceStmtObjectAddress(Node *node, bool missing_ok, bool isPostprocess)
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
Assert(stmt->objtype == OBJECT_SEQUENCE);
RangeVar *sequence = stmt->relation;
Oid seqOid = RangeVarGetRelid(sequence, NoLock, missing_ok);
@ -811,7 +811,7 @@ PreprocessSequenceAlterTableStmt(Node *node, const char *queryString,
ProcessUtilityContext processUtilityContext)
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
Assert(stmt->objtype == OBJECT_SEQUENCE);
ListCell *cmdCell = NULL;
foreach(cmdCell, stmt->cmds)

View File

@ -41,6 +41,7 @@
#include "distributed/resource_lock.h"
#include "distributed/version_compat.h"
#include "distributed/worker_shard_visibility.h"
#include "distributed/tenant_schema_metadata.h"
#include "foreign/foreign.h"
#include "lib/stringinfo.h"
#include "nodes/parsenodes.h"
@ -229,6 +230,17 @@ PostprocessCreateTableStmt(CreateStmt *createStatement, const char *queryString)
{
PostprocessCreateTableStmtForeignKeys(createStatement);
bool missingOk = false;
Oid relationId = RangeVarGetRelid(createStatement->relation, NoLock, missingOk);
Oid schemaId = get_rel_namespace(relationId);
if (createStatement->ofTypename && IsTenantSchema(schemaId))
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot create tables in a distributed schema using "
"CREATE TABLE OF syntax")));
}
if (createStatement->inhRelations != NIL)
{
if (createStatement->partbound != NULL)
@ -239,15 +251,31 @@ PostprocessCreateTableStmt(CreateStmt *createStatement, const char *queryString)
else
{
/* process CREATE TABLE ... INHERITS ... */
if (IsTenantSchema(schemaId))
{
ereport(ERROR, (errmsg("tables in a distributed schema cannot inherit "
"or be inherited")));
}
RangeVar *parentRelation = NULL;
foreach_ptr(parentRelation, createStatement->inhRelations)
{
bool missingOk = false;
Oid parentRelationId = RangeVarGetRelid(parentRelation, NoLock,
missingOk);
Assert(parentRelationId != InvalidOid);
if (IsCitusTable(parentRelationId))
/*
* Throw a better error message if the user tries to inherit a
* tenant table or if the user tries to inherit from a tenant
* table.
*/
if (IsTenantSchema(get_rel_namespace(parentRelationId)))
{
ereport(ERROR, (errmsg("tables in a distributed schema cannot "
"inherit or be inherited")));
}
else if (IsCitusTable(parentRelationId))
{
/* here we error out if inheriting a distributed table */
ereport(ERROR, (errmsg("non-distributed tables cannot inherit "
@ -282,6 +310,15 @@ PostprocessCreateTableStmtForeignKeys(CreateStmt *createStatement)
bool missingOk = false;
Oid relationId = RangeVarGetRelid(createStatement->relation, NoLock, missingOk);
if (ShouldCreateTenantSchemaTable(relationId))
{
/*
* Avoid unnecessarily adding the table into metadata if we will
* distribute it as a tenant table later.
*/
return;
}
/*
* As we are just creating the table, we cannot have foreign keys that our
* relation is referenced. So we use INCLUDE_REFERENCING_CONSTRAINTS here.
@ -378,12 +415,22 @@ PostprocessCreateTableStmtPartitionOf(CreateStmt *createStatement, const
}
}
if (IsTenantSchema(get_rel_namespace(parentRelationId)) ||
IsTenantSchema(get_rel_namespace(relationId)))
{
ErrorIfIllegalPartitioningInTenantSchema(parentRelationId, relationId);
}
/*
* If a partition is being created and if its parent is a distributed
* table, we will distribute this table as well.
*/
if (IsCitusTable(parentRelationId))
{
/*
* We can create Citus local tables right away, without switching to
* sequential mode, because they are going to have only one shard.
*/
if (IsCitusTableType(parentRelationId, CITUS_LOCAL_TABLE))
{
CreateCitusLocalTablePartitionOf(createStatement, relationId,
@ -391,18 +438,7 @@ PostprocessCreateTableStmtPartitionOf(CreateStmt *createStatement, const
return;
}
Var *parentDistributionColumn = DistPartitionKeyOrError(parentRelationId);
char *distributionColumnName =
ColumnToColumnName(parentRelationId, (Node *) parentDistributionColumn);
char parentDistributionMethod = DISTRIBUTE_BY_HASH;
char *parentRelationName = generate_qualified_relation_name(parentRelationId);
SwitchToSequentialAndLocalExecutionIfPartitionNameTooLong(parentRelationId,
relationId);
CreateDistributedTable(relationId, distributionColumnName,
parentDistributionMethod, ShardCount, false,
parentRelationName);
DistributePartitionUsingParent(parentRelationId, relationId);
}
}
@ -465,6 +501,13 @@ PreprocessAlterTableStmtAttachPartition(AlterTableStmt *alterTableStatement,
return NIL;
}
if (IsTenantSchema(get_rel_namespace(parentRelationId)) ||
IsTenantSchema(get_rel_namespace(partitionRelationId)))
{
ErrorIfIllegalPartitioningInTenantSchema(parentRelationId,
partitionRelationId);
}
if (!IsCitusTable(parentRelationId))
{
/*
@ -589,19 +632,45 @@ PreprocessAttachCitusPartitionToCitusTable(Oid parentCitusRelationId, Oid
/*
* DistributePartitionUsingParent takes a parent and a partition relation and
* distributes the partition, using the same distribution column as the parent.
* It creates a *hash* distributed table by default, as partitioned tables can only be
* distributed by hash.
* distributes the partition, using the same distribution column as the parent, if the
* parent has a distribution column. It creates a *hash* distributed table by default, as
* partitioned tables can only be distributed by hash, unless it's null key distributed.
*
* If the parent has no distribution key, we distribute the partition with null key too.
*/
static void
DistributePartitionUsingParent(Oid parentCitusRelationId, Oid partitionRelationId)
{
char *parentRelationName = generate_qualified_relation_name(parentCitusRelationId);
/*
* We can create tenant tables and single shard tables right away, without
* switching to sequential mode, because they are going to have only one shard.
*/
if (ShouldCreateTenantSchemaTable(partitionRelationId))
{
CreateTenantSchemaTable(partitionRelationId);
return;
}
else if (!HasDistributionKey(parentCitusRelationId))
{
/*
* If the parent is null key distributed, we should distribute the partition
* with null distribution key as well.
*/
ColocationParam colocationParam = {
.colocationParamType = COLOCATE_WITH_TABLE_LIKE_OPT,
.colocateWithTableName = parentRelationName,
};
CreateSingleShardTable(partitionRelationId, colocationParam);
return;
}
Var *distributionColumn = DistPartitionKeyOrError(parentCitusRelationId);
char *distributionColumnName = ColumnToColumnName(parentCitusRelationId,
(Node *) distributionColumn);
char distributionMethod = DISTRIBUTE_BY_HASH;
char *parentRelationName = generate_qualified_relation_name(parentCitusRelationId);
SwitchToSequentialAndLocalExecutionIfPartitionNameTooLong(
parentCitusRelationId, partitionRelationId);
@ -1066,7 +1135,7 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
if (relKind == RELKIND_SEQUENCE)
{
AlterTableStmt *stmtCopy = copyObject(alterTableStatement);
AlterTableStmtObjType_compat(stmtCopy) = OBJECT_SEQUENCE;
stmtCopy->objtype = OBJECT_SEQUENCE;
#if (PG_VERSION_NUM >= PG_VERSION_15)
/*
@ -1096,7 +1165,7 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
* passes through an AlterTableStmt
*/
AlterTableStmt *stmtCopy = copyObject(alterTableStatement);
AlterTableStmtObjType_compat(stmtCopy) = OBJECT_VIEW;
stmtCopy->objtype = OBJECT_VIEW;
return PreprocessAlterViewStmt((Node *) stmtCopy, alterTableCommand,
processUtilityContext);
}
@ -1314,6 +1383,16 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
constraintName, missingOk);
rightRelationId = GetReferencedTableId(foreignKeyId);
}
/*
* We support deparsing for DROP CONSTRAINT, but currently deparsing is only
* possible if all subcommands are supported.
*/
if (list_length(commandList) == 1 &&
alterTableStatement->objtype == OBJECT_TABLE)
{
deparseAT = true;
}
}
else if (alterTableType == AT_AddColumn)
{
@ -1521,11 +1600,10 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
DDLJob *ddlJob = palloc0(sizeof(DDLJob));
ObjectAddressSet(ddlJob->targetObjectAddress, RelationRelationId, leftRelationId);
const char *sqlForTaskList = alterTableCommand;
if (deparseAT)
{
newStmt->cmds = list_make1(newCmd);
sqlForTaskList = DeparseTreeNode((Node *) newStmt);
alterTableCommand = DeparseTreeNode((Node *) newStmt);
}
ddlJob->metadataSyncCommand = useInitialDDLCommandString ? alterTableCommand : NULL;
@ -1541,13 +1619,13 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand,
{
/* if foreign key or attaching partition index related, use specialized task list function ... */
ddlJob->taskList = InterShardDDLTaskList(leftRelationId, rightRelationId,
sqlForTaskList);
alterTableCommand);
}
}
else
{
/* ... otherwise use standard DDL task list function */
ddlJob->taskList = DDLTaskList(leftRelationId, sqlForTaskList);
ddlJob->taskList = DDLTaskList(leftRelationId, alterTableCommand);
if (!propagateCommandToWorkers)
{
ddlJob->taskList = NIL;
@ -2233,6 +2311,53 @@ PreprocessAlterTableSchemaStmt(Node *node, const char *queryString,
return NIL;
}
Oid oldSchemaId = get_rel_namespace(relationId);
Oid newSchemaId = get_namespace_oid(stmt->newschema, stmt->missing_ok);
if (!OidIsValid(oldSchemaId) || !OidIsValid(newSchemaId))
{
return NIL;
}
/* Do nothing if new schema is the same as old schema */
if (newSchemaId == oldSchemaId)
{
return NIL;
}
/* Undistribute table if its old schema is a tenant schema */
if (IsTenantSchema(oldSchemaId) && IsCoordinator())
{
EnsureUndistributeTenantTableSafe(relationId,
TenantOperationNames[TENANT_SET_SCHEMA]);
char *oldSchemaName = get_namespace_name(oldSchemaId);
char *tableName = stmt->relation->relname;
ereport(NOTICE, (errmsg("undistributing table %s in distributed schema %s "
"before altering its schema", tableName, oldSchemaName)));
/* Undistribute tenant table by suppressing weird notices */
TableConversionParameters params = {
.relationId = relationId,
.cascadeViaForeignKeys = false,
.bypassTenantCheck = true,
.suppressNoticeMessages = true,
};
UndistributeTable(&params);
/* relation id changes after undistribute_table */
relationId = get_relname_relid(tableName, oldSchemaId);
/*
* After undistribution, the table could be Citus table or Postgres table.
* If it is Postgres table, do not propagate the `ALTER TABLE SET SCHEMA`
* command to workers.
*/
if (!IsCitusTable(relationId))
{
return NIL;
}
}
DDLJob *ddlJob = palloc0(sizeof(DDLJob));
QualifyTreeNode((Node *) stmt);
ObjectAddressSet(ddlJob->targetObjectAddress, RelationRelationId, relationId);
@ -2396,13 +2521,13 @@ PostprocessAlterTableStmt(AlterTableStmt *alterTableStatement)
char relKind = get_rel_relkind(relationId);
if (relKind == RELKIND_SEQUENCE)
{
AlterTableStmtObjType_compat(alterTableStatement) = OBJECT_SEQUENCE;
alterTableStatement->objtype = OBJECT_SEQUENCE;
PostprocessAlterSequenceOwnerStmt((Node *) alterTableStatement, NULL);
return;
}
else if (relKind == RELKIND_VIEW)
{
AlterTableStmtObjType_compat(alterTableStatement) = OBJECT_VIEW;
alterTableStatement->objtype = OBJECT_VIEW;
PostprocessAlterViewStmt((Node *) alterTableStatement, NULL);
return;
}
@ -3392,7 +3517,6 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement)
break;
}
#if PG_VERSION_NUM >= PG_VERSION_14
case AT_DetachPartitionFinalize:
{
ereport(ERROR, (errmsg("ALTER TABLE .. DETACH PARTITION .. FINALIZE "
@ -3400,7 +3524,6 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement)
break;
}
#endif
case AT_DetachPartition:
{
/* we only allow partitioning commands if they are only subcommand */
@ -3412,7 +3535,7 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement)
errhint("You can issue each subcommand "
"separately.")));
}
#if PG_VERSION_NUM >= PG_VERSION_14
PartitionCmd *partitionCommand = (PartitionCmd *) command->def;
if (partitionCommand->concurrent)
@ -3421,7 +3544,6 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement)
"CONCURRENTLY commands are currently "
"unsupported.")));
}
#endif
break;
}
@ -3464,9 +3586,7 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement)
case AT_NoForceRowSecurity:
case AT_ValidateConstraint:
case AT_DropConstraint: /* we do the check for invalidation in AlterTableDropsForeignKey */
#if PG_VERSION_NUM >= PG_VERSION_14
case AT_SetCompression:
#endif
{
/*
* We will not perform any special check for:
@ -3977,36 +4097,6 @@ MakeNameListFromRangeVar(const RangeVar *rel)
}
/*
* ErrorIfTableHasUnsupportedIdentityColumn errors out if the given table has any identity column other than bigint identity column.
*/
void
ErrorIfTableHasUnsupportedIdentityColumn(Oid relationId)
{
Relation relation = relation_open(relationId, AccessShareLock);
TupleDesc tupleDescriptor = RelationGetDescr(relation);
for (int attributeIndex = 0; attributeIndex < tupleDescriptor->natts;
attributeIndex++)
{
Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, attributeIndex);
if (attributeForm->attidentity && attributeForm->atttypid != INT8OID)
{
char *qualifiedRelationName = generate_qualified_relation_name(relationId);
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg(
"cannot complete operation on %s with smallint/int identity column",
qualifiedRelationName),
errhint(
"Use bigint identity column instead.")));
}
}
relation_close(relation, NoLock);
}
/*
* ErrorIfTableHasIdentityColumn errors out if the given table has identity column
*/
@ -4031,3 +4121,145 @@ ErrorIfTableHasIdentityColumn(Oid relationId)
relation_close(relation, NoLock);
}
/*
* ConvertNewTableIfNecessary converts the given table to a tenant schema
* table or a Citus managed table if necessary.
*
* Input node is expected to be a CreateStmt or a CreateTableAsStmt.
*/
void
ConvertNewTableIfNecessary(Node *createStmt)
{
/*
* Need to increment command counter so that next command
* can see the new table.
*/
CommandCounterIncrement();
if (IsA(createStmt, CreateTableAsStmt))
{
CreateTableAsStmt *createTableAsStmt = (CreateTableAsStmt *) createStmt;
bool missingOk = false;
Oid createdRelationId = RangeVarGetRelid(createTableAsStmt->into->rel,
NoLock, missingOk);
if (ShouldCreateTenantSchemaTable(createdRelationId))
{
/* not try to convert the table if it already exists and IF NOT EXISTS syntax is used */
if (createTableAsStmt->if_not_exists && IsCitusTable(createdRelationId))
{
return;
}
CreateTenantSchemaTable(createdRelationId);
}
/*
* We simply ignore the tables created by using that syntax when using
* Citus managed tables.
*/
return;
}
CreateStmt *baseCreateTableStmt = (CreateStmt *) createStmt;
bool missingOk = false;
Oid createdRelationId = RangeVarGetRelid(baseCreateTableStmt->relation,
NoLock, missingOk);
/* not try to convert the table if it already exists and IF NOT EXISTS syntax is used */
if (baseCreateTableStmt->if_not_exists && IsCitusTable(createdRelationId))
{
return;
}
/*
* Check ShouldCreateTenantSchemaTable() before ShouldAddNewTableToMetadata()
* because we don't want to unnecessarily add the table into metadata
* (as a Citus managed table) before distributing it as a tenant table.
*/
if (ShouldCreateTenantSchemaTable(createdRelationId))
{
/*
* We skip creating tenant schema table if the table is a partition
* table because in that case PostprocessCreateTableStmt() should've
* already created a tenant schema table from the partition table.
*/
if (!PartitionTable(createdRelationId))
{
CreateTenantSchemaTable(createdRelationId);
}
}
else if (ShouldAddNewTableToMetadata(createdRelationId))
{
/*
* Here we set autoConverted to false, since the user explicitly
* wants these tables to be added to metadata, by setting the
* GUC use_citus_managed_tables to true.
*/
bool autoConverted = false;
bool cascade = true;
CreateCitusLocalTable(createdRelationId, cascade, autoConverted);
}
}
/*
* ConvertToTenantTableIfNecessary converts given relation to a tenant table if its
* schema changed to a distributed schema.
*/
void
ConvertToTenantTableIfNecessary(AlterObjectSchemaStmt *stmt)
{
Assert(stmt->objectType == OBJECT_TABLE || stmt->objectType == OBJECT_FOREIGN_TABLE);
if (!IsCoordinator())
{
return;
}
/*
* We will let Postgres deal with missing_ok
*/
List *tableAddresses = GetObjectAddressListFromParseTree((Node *) stmt, true, true);
/* the code-path only supports a single object */
Assert(list_length(tableAddresses) == 1);
/* We have already asserted that we have exactly 1 address in the addresses. */
ObjectAddress *tableAddress = linitial(tableAddresses);
char relKind = get_rel_relkind(tableAddress->objectId);
if (relKind == RELKIND_SEQUENCE || relKind == RELKIND_VIEW)
{
return;
}
Oid relationId = tableAddress->objectId;
Oid schemaId = get_namespace_oid(stmt->newschema, stmt->missing_ok);
if (!OidIsValid(schemaId))
{
return;
}
/*
* Make table a tenant table when its schema actually changed. When its schema
* is not changed as in `ALTER TABLE <tbl> SET SCHEMA <same_schema>`, we detect
* that by seeing the table is still a single shard table. (i.e. not undistributed
* at `preprocess` step)
*/
if (!IsCitusTableType(relationId, SINGLE_SHARD_DISTRIBUTED) &&
IsTenantSchema(schemaId))
{
EnsureTenantTable(relationId, "ALTER TABLE SET SCHEMA");
char *schemaName = get_namespace_name(schemaId);
char *tableName = stmt->relation->relname;
ereport(NOTICE, (errmsg("Moving %s into distributed schema %s",
tableName, schemaName)));
CreateTenantSchemaTable(relationId);
}
}

View File

@ -324,7 +324,8 @@ ExecuteTruncateStmtSequentialIfNecessary(TruncateStmt *command)
{
Oid relationId = RangeVarGetRelid(rangeVar, NoLock, failOK);
if (IsCitusTable(relationId) && !HasDistributionKey(relationId) &&
if ((IsCitusTableType(relationId, REFERENCE_TABLE) ||
IsCitusTableType(relationId, CITUS_LOCAL_TABLE)) &&
TableReferenced(relationId))
{
char *relationName = get_rel_name(relationId);

View File

@ -350,7 +350,7 @@ List *
AlterTypeStmtObjectAddress(Node *node, bool missing_ok, bool isPostprocess)
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_TYPE);
Assert(stmt->objtype == OBJECT_TYPE);
TypeName *typeName = MakeTypeNameFromRangeVar(stmt->relation);
Oid typeOid = LookupTypeNameOid(NULL, typeName, missing_ok);
@ -549,7 +549,7 @@ CreateTypeDDLCommandsIdempotent(const ObjectAddress *typeAddress)
const char *username = GetUserNameFromId(GetTypeOwner(typeAddress->objectId), false);
initStringInfo(&buf);
appendStringInfo(&buf, ALTER_TYPE_OWNER_COMMAND,
getObjectIdentity_compat(typeAddress, false),
getObjectIdentity(typeAddress, false),
quote_identifier(username));
ddlCommands = lappend(ddlCommands, buf.data);

View File

@ -33,9 +33,6 @@
#include "access/attnum.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#if PG_VERSION_NUM < 140000
#include "access/xact.h"
#endif
#include "catalog/catalog.h"
#include "catalog/dependency.h"
#include "citus_version.h"
@ -60,9 +57,6 @@
#include "distributed/maintenanced.h"
#include "distributed/multi_logical_replication.h"
#include "distributed/multi_partitioning_utils.h"
#if PG_VERSION_NUM < 140000
#include "distributed/metadata_cache.h"
#endif
#include "distributed/metadata_sync.h"
#include "distributed/metadata/distobject.h"
#include "distributed/multi_executor.h"
@ -107,16 +101,13 @@ static void ProcessUtilityInternal(PlannedStmt *pstmt,
struct QueryEnvironment *queryEnv,
DestReceiver *dest,
QueryCompletion *completionTag);
#if PG_VERSION_NUM >= 140000
static void set_indexsafe_procflags(void);
#endif
static char * CurrentSearchPath(void);
static void IncrementUtilityHookCountersIfNecessary(Node *parsetree);
static void PostStandardProcessUtility(Node *parsetree);
static void DecrementUtilityHookCountersIfNecessary(Node *parsetree);
static bool IsDropSchemaOrDB(Node *parsetree);
static bool ShouldCheckUndistributeCitusLocalTables(void);
static bool ShouldAddNewTableToMetadata(Node *parsetree);
/*
* ProcessUtilityParseTree is a convenience method to create a PlannedStmt out of
@ -132,7 +123,7 @@ ProcessUtilityParseTree(Node *node, const char *queryString, ProcessUtilityConte
plannedStmt->commandType = CMD_UTILITY;
plannedStmt->utilityStmt = node;
ProcessUtility_compat(plannedStmt, queryString, false, context, params, NULL, dest,
ProcessUtility(plannedStmt, queryString, false, context, params, NULL, dest,
completionTag);
}
@ -149,25 +140,19 @@ ProcessUtilityParseTree(Node *node, const char *queryString, ProcessUtilityConte
void
multi_ProcessUtility(PlannedStmt *pstmt,
const char *queryString,
#if PG_VERSION_NUM >= PG_VERSION_14
bool readOnlyTree,
#endif
ProcessUtilityContext context,
ParamListInfo params,
struct QueryEnvironment *queryEnv,
DestReceiver *dest,
QueryCompletion *completionTag)
{
Node *parsetree;
#if PG_VERSION_NUM >= PG_VERSION_14
if (readOnlyTree)
{
pstmt = copyObject(pstmt);
}
#endif
parsetree = pstmt->utilityStmt;
Node *parsetree = pstmt->utilityStmt;
if (IsA(parsetree, TransactionStmt))
{
@ -187,7 +172,9 @@ multi_ProcessUtility(PlannedStmt *pstmt,
IsA(parsetree, ExecuteStmt) ||
IsA(parsetree, PrepareStmt) ||
IsA(parsetree, DiscardStmt) ||
IsA(parsetree, DeallocateStmt))
IsA(parsetree, DeallocateStmt) ||
IsA(parsetree, DeclareCursorStmt) ||
IsA(parsetree, FetchStmt))
{
/*
* Skip additional checks for common commands that do not have any
@ -198,7 +185,7 @@ multi_ProcessUtility(PlannedStmt *pstmt,
* that state. Since we never need to intercept transaction statements,
* skip our checks and immediately fall into standard_ProcessUtility.
*/
PrevProcessUtility_compat(pstmt, queryString, false, context,
PrevProcessUtility(pstmt, queryString, false, context,
params, queryEnv, dest, completionTag);
return;
@ -243,7 +230,7 @@ multi_ProcessUtility(PlannedStmt *pstmt,
* Ensure that utility commands do not behave any differently until CREATE
* EXTENSION is invoked.
*/
PrevProcessUtility_compat(pstmt, queryString, false, context,
PrevProcessUtility(pstmt, queryString, false, context,
params, queryEnv, dest, completionTag);
return;
@ -275,7 +262,7 @@ multi_ProcessUtility(PlannedStmt *pstmt,
PG_TRY();
{
PrevProcessUtility_compat(pstmt, queryString, false, context,
PrevProcessUtility(pstmt, queryString, false, context,
params, queryEnv, dest, completionTag);
StoredProcedureLevel -= 1;
@ -309,7 +296,7 @@ multi_ProcessUtility(PlannedStmt *pstmt,
PG_TRY();
{
PrevProcessUtility_compat(pstmt, queryString, false, context,
PrevProcessUtility(pstmt, queryString, false, context,
params, queryEnv, dest, completionTag);
DoBlockLevel -= 1;
@ -344,26 +331,44 @@ multi_ProcessUtility(PlannedStmt *pstmt,
}
ResetConstraintDropped();
/*
* We're only interested in top-level CREATE TABLE commands
* to create a tenant schema table or a Citus managed table.
*/
if (context == PROCESS_UTILITY_TOPLEVEL &&
ShouldAddNewTableToMetadata(parsetree))
(IsA(parsetree, CreateStmt) ||
IsA(parsetree, CreateForeignTableStmt) ||
IsA(parsetree, CreateTableAsStmt)))
{
Node *createStmt = NULL;
if (IsA(parsetree, CreateTableAsStmt))
{
createStmt = parsetree;
}
else
{
/*
* Here we need to increment command counter so that next command
* can see the new table.
* Not directly cast to CreateStmt to guard against the case where
* the definition of CreateForeignTableStmt changes in future.
*/
CommandCounterIncrement();
CreateStmt *createTableStmt = (CreateStmt *) parsetree;
Oid relationId = RangeVarGetRelid(createTableStmt->relation,
NoLock, false);
createStmt =
IsA(parsetree, CreateStmt) ? parsetree :
(Node *) &(((CreateForeignTableStmt *) parsetree)->base);
}
/*
* Here we set autoConverted to false, since the user explicitly
* wants these tables to be added to metadata, by setting the
* GUC use_citus_managed_tables to true.
*/
bool autoConverted = false;
bool cascade = true;
CreateCitusLocalTable(relationId, cascade, autoConverted);
ConvertNewTableIfNecessary(createStmt);
}
if (context == PROCESS_UTILITY_TOPLEVEL &&
IsA(parsetree, AlterObjectSchemaStmt))
{
AlterObjectSchemaStmt *alterSchemaStmt = castNode(AlterObjectSchemaStmt,
parsetree);
if (alterSchemaStmt->objectType == OBJECT_TABLE ||
alterSchemaStmt->objectType == OBJECT_FOREIGN_TABLE)
{
ConvertToTenantTableIfNecessary(alterSchemaStmt);
}
}
}
@ -496,7 +501,7 @@ ProcessUtilityInternal(PlannedStmt *pstmt,
VariableSetStmt *setStmt = (VariableSetStmt *) parsetree;
/* at present, we only implement the NONE and LOCAL behaviors */
AssertState(PropagateSetCommands == PROPSETCMD_NONE ||
Assert(PropagateSetCommands == PROPSETCMD_NONE ||
PropagateSetCommands == PROPSETCMD_LOCAL);
if (IsMultiStatementTransaction() && ShouldPropagateSetCommand(setStmt))
@ -630,8 +635,8 @@ ProcessUtilityInternal(PlannedStmt *pstmt,
if (IsA(parsetree, AlterTableStmt))
{
AlterTableStmt *alterTableStmt = (AlterTableStmt *) parsetree;
if (AlterTableStmtObjType_compat(alterTableStmt) == OBJECT_TABLE ||
AlterTableStmtObjType_compat(alterTableStmt) == OBJECT_FOREIGN_TABLE)
if (alterTableStmt->objtype == OBJECT_TABLE ||
alterTableStmt->objtype == OBJECT_FOREIGN_TABLE)
{
ErrorIfAlterDropsPartitionColumn(alterTableStmt);
@ -750,7 +755,7 @@ ProcessUtilityInternal(PlannedStmt *pstmt,
PreprocessAlterExtensionCitusStmtForCitusColumnar(parsetree);
}
PrevProcessUtility_compat(pstmt, queryString, false, context,
PrevProcessUtility(pstmt, queryString, false, context,
params, queryEnv, dest, completionTag);
if (isAlterExtensionUpdateCitusStmt)
@ -994,7 +999,8 @@ UndistributeDisconnectedCitusLocalTables(void)
TableConversionParameters params = {
.relationId = citusLocalTableId,
.cascadeViaForeignKeys = true,
.suppressNoticeMessages = true
.suppressNoticeMessages = true,
.bypassTenantCheck = false
};
UndistributeTable(&params);
}
@ -1060,60 +1066,6 @@ ShouldCheckUndistributeCitusLocalTables(void)
}
/*
* ShouldAddNewTableToMetadata takes a Node* and returns true if we need to add a
* newly created table to metadata, false otherwise.
* This function checks whether the given Node* is a CREATE TABLE statement.
* For partitions and temporary tables, ShouldAddNewTableToMetadata returns false.
* For other tables created, returns true, if we are on a coordinator that is added
* as worker, and ofcourse, if the GUC use_citus_managed_tables is set to on.
*/
static bool
ShouldAddNewTableToMetadata(Node *parsetree)
{
CreateStmt *createTableStmt;
if (IsA(parsetree, CreateStmt))
{
createTableStmt = (CreateStmt *) parsetree;
}
else if (IsA(parsetree, CreateForeignTableStmt))
{
CreateForeignTableStmt *createForeignTableStmt =
(CreateForeignTableStmt *) parsetree;
createTableStmt = (CreateStmt *) &(createForeignTableStmt->base);
}
else
{
/* if the command is not CREATE [FOREIGN] TABLE, we can early return false */
return false;
}
if (createTableStmt->relation->relpersistence == RELPERSISTENCE_TEMP ||
createTableStmt->partbound != NULL)
{
/*
* Shouldn't add table to metadata if it's a temp table, or a partition.
* Creating partitions of a table that is added to metadata is already handled.
*/
return false;
}
if (AddAllLocalTablesToMetadata && !IsBinaryUpgrade &&
IsCoordinator() && CoordinatorAddedAsWorkerNode())
{
/*
* We have verified that the GUC is set to true, and we are not upgrading,
* and we are on the coordinator that is added as worker node.
* So return true here, to add this newly created table to metadata.
*/
return true;
}
return false;
}
/*
* NotifyUtilityHookConstraintDropped sets ConstraintDropped to true to tell us
* last command dropped a table constraint.
@ -1242,38 +1194,6 @@ ExecuteDistributedDDLJob(DDLJob *ddlJob)
*/
if (ddlJob->startNewTransaction)
{
#if PG_VERSION_NUM < 140000
/*
* Older versions of postgres doesn't have PROC_IN_SAFE_IC flag
* so we cannot use set_indexsafe_procflags in those versions.
*
* For this reason, we do our best to ensure not grabbing any
* snapshots later in the executor.
*/
/*
* If cache is not populated, system catalog lookups will cause
* the xmin of current backend to change. Then the last phase
* of CREATE INDEX CONCURRENTLY, which is in a separate backend,
* will hang waiting for our backend and result in a deadlock.
*
* We populate the cache before starting the next transaction to
* avoid this. Most of the metadata has already been resolved in
* planning phase, we only need to lookup metadata needed for
* connection establishment.
*/
(void) CurrentDatabaseName();
/*
* ConnParams (AuthInfo and PoolInfo) gets a snapshot, which
* will blocks the remote connections to localhost. Hence we warm up
* the cache here so that after we start a new transaction, the entries
* will already be in the hash table, hence we won't be holding any snapshots.
*/
WarmUpConnParamsHash();
#endif
/*
* Since it is not certain whether the code-path that we followed
* until reaching here caused grabbing any snapshots or not, we
@ -1292,8 +1212,6 @@ ExecuteDistributedDDLJob(DDLJob *ddlJob)
CommitTransactionCommand();
StartTransactionCommand();
#if PG_VERSION_NUM >= 140000
/*
* Tell other backends to ignore us, even if we grab any
* snapshots via adaptive executor.
@ -1308,7 +1226,6 @@ ExecuteDistributedDDLJob(DDLJob *ddlJob)
* given above.
*/
Assert(localExecutionSupported == false);
#endif
}
MemoryContext savedContext = CurrentMemoryContext;
@ -1374,8 +1291,6 @@ ExecuteDistributedDDLJob(DDLJob *ddlJob)
}
#if PG_VERSION_NUM >= 140000
/*
* set_indexsafe_procflags sets PROC_IN_SAFE_IC flag in MyProc->statusFlags.
*
@ -1398,9 +1313,6 @@ set_indexsafe_procflags(void)
}
#endif
/*
* CurrentSearchPath is a C interface for calling current_schemas(bool) that
* PostgreSQL exports.
@ -1591,18 +1503,19 @@ DDLTaskList(Oid relationId, const char *commandString)
List *
NodeDDLTaskList(TargetWorkerSet targets, List *commands)
{
DDLJob *ddlJob = palloc0(sizeof(DDLJob));
ddlJob->targetObjectAddress = InvalidObjectAddress;
ddlJob->metadataSyncCommand = NULL;
/* don't allow concurrent node list changes that require an exclusive lock */
List *workerNodes = TargetWorkerSetNodeList(targets, RowShareLock);
if (list_length(workerNodes) <= 0)
{
/*
* if there are no nodes we don't have to plan any ddl tasks. Planning them would
* cause the executor to stop responding.
*/
return NIL;
}
if (list_length(workerNodes) > 0)
{
Task *task = CitusMakeNode(Task);
task->taskType = DDL_TASK;
SetTaskQueryStringList(task, commands);
@ -1618,10 +1531,9 @@ NodeDDLTaskList(TargetWorkerSet targets, List *commands)
task->taskPlacementList = lappend(task->taskPlacementList, targetPlacement);
}
DDLJob *ddlJob = palloc0(sizeof(DDLJob));
ddlJob->targetObjectAddress = InvalidObjectAddress;
ddlJob->metadataSyncCommand = NULL;
ddlJob->taskList = list_make1(task);
}
return list_make1(ddlJob);
}

View File

@ -359,12 +359,12 @@ DeparseVacuumStmtPrefix(CitusVacuumParams vacuumParams)
{
appendStringInfoString(vacuumPrefix, "SKIP_LOCKED,");
}
#if PG_VERSION_NUM >= PG_VERSION_14
if (vacuumFlags & VACOPT_PROCESS_TOAST)
{
appendStringInfoString(vacuumPrefix, "PROCESS_TOAST,");
}
#endif
if (vacuumParams.truncate != VACOPTVALUE_UNSPECIFIED)
{
appendStringInfoString(vacuumPrefix,
@ -389,13 +389,11 @@ DeparseVacuumStmtPrefix(CitusVacuumParams vacuumParams)
break;
}
#if PG_VERSION_NUM >= PG_VERSION_14
case VACOPTVALUE_AUTO:
{
appendStringInfoString(vacuumPrefix, "INDEX_CLEANUP auto,");
break;
}
#endif
default:
{
@ -501,9 +499,7 @@ VacuumStmtParams(VacuumStmt *vacstmt)
bool freeze = false;
bool full = false;
bool disable_page_skipping = false;
#if PG_VERSION_NUM >= PG_VERSION_14
bool process_toast = false;
#endif
/* Set default value */
params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
@ -547,16 +543,12 @@ VacuumStmtParams(VacuumStmt *vacstmt)
{
disable_page_skipping = defGetBoolean(opt);
}
#if PG_VERSION_NUM >= PG_VERSION_14
else if (strcmp(opt->defname, "process_toast") == 0)
{
process_toast = defGetBoolean(opt);
}
#endif
else if (strcmp(opt->defname, "index_cleanup") == 0)
{
#if PG_VERSION_NUM >= PG_VERSION_14
/* Interpret no string as the default, which is 'auto' */
if (!opt->arg)
{
@ -577,10 +569,6 @@ VacuumStmtParams(VacuumStmt *vacstmt)
VACOPTVALUE_DISABLED;
}
}
#else
params.index_cleanup = defGetBoolean(opt) ? VACOPTVALUE_ENABLED :
VACOPTVALUE_DISABLED;
#endif
}
else if (strcmp(opt->defname, "truncate") == 0)
{
@ -625,9 +613,7 @@ VacuumStmtParams(VacuumStmt *vacstmt)
(analyze ? VACOPT_ANALYZE : 0) |
(freeze ? VACOPT_FREEZE : 0) |
(full ? VACOPT_FULL : 0) |
#if PG_VERSION_NUM >= PG_VERSION_14
(process_toast ? VACOPT_PROCESS_TOAST : 0) |
#endif
(disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0);
return params;
}

View File

@ -89,7 +89,7 @@ PostprocessVariableSetStmt(VariableSetStmt *setStmt, const char *setStmtString)
List *connectionList = NIL;
/* at present we only support SET LOCAL and SET TRANSACTION */
AssertArg(ShouldPropagateSetCommand(setStmt));
Assert(ShouldPropagateSetCommand(setStmt));
/* haven't seen any SET stmts so far in this (sub-)xact: initialize StringInfo */
if (activeSetStmts == NULL)

View File

@ -598,7 +598,7 @@ List *
PostprocessAlterViewStmt(Node *node, const char *queryString)
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_VIEW);
Assert(stmt->objtype == OBJECT_VIEW);
List *viewAddresses = GetObjectAddressListFromParseTree((Node *) stmt, true, true);

View File

@ -24,7 +24,19 @@
#include "utils/builtins.h"
/* stores the string representation of our node connection GUC */
char *NodeConninfo = "";
#ifdef USE_SSL
char *NodeConninfo = "sslmode=require";
#else
char *NodeConninfo = "sslmode=prefer";
#endif
/*
* Previously we would use an empty initial value for NodeConnInfo
* PG16 however requires same initial and boot values for configuration parameters
* Therefore we now use this flag in NodeConninfoGucAssignHook
*/
bool checkAtBootPassed = false;
char *LocalHostName = "localhost";
/* represents a list of libpq parameter settings */
@ -183,7 +195,7 @@ CheckConninfo(const char *conninfo, const char **allowedConninfoKeywords,
const char *prev = allowedConninfoKeywords[keywordIdx - 1];
const char *curr = allowedConninfoKeywords[keywordIdx];
AssertArg(strcmp(prev, curr) < 0);
Assert(strcmp(prev, curr) < 0);
}
#endif

View File

@ -1314,33 +1314,6 @@ StartConnectionEstablishment(MultiConnection *connection, ConnectionHashKey *key
}
#if PG_VERSION_NUM < 140000
/*
* WarmUpConnParamsHash warms up the ConnParamsHash by loading all the
* conn params for active primary nodes.
*/
void
WarmUpConnParamsHash(void)
{
List *workerNodeList = ActivePrimaryNodeList(AccessShareLock);
WorkerNode *workerNode = NULL;
foreach_ptr(workerNode, workerNodeList)
{
ConnectionHashKey key;
strlcpy(key.hostname, workerNode->workerName, MAX_NODE_LENGTH);
key.port = workerNode->workerPort;
strlcpy(key.database, CurrentDatabaseName(), NAMEDATALEN);
strlcpy(key.user, CurrentUserName(), NAMEDATALEN);
key.replicationConnParam = false;
FindOrCreateConnParamsEntry(&key);
}
}
#endif
/*
* FindOrCreateConnParamsEntry searches ConnParamsHash for the given key,
* if it is not found, it is created.

View File

@ -958,7 +958,7 @@ ResetShardPlacementAssociation(struct MultiConnection *connection)
/*
* ResetPlacementConnectionManagement() disassociates connections from
* ResetPlacementConnectionManagement() dissociates connections from
* placements and shards. This will be called at the end of XACT_EVENT_COMMIT
* and XACT_EVENT_ABORT.
*/

View File

@ -22,9 +22,7 @@
#include "access/skey.h"
#include "access/stratnum.h"
#include "access/sysattr.h"
#if PG_VERSION_NUM >= PG_VERSION_14
#include "access/toast_compression.h"
#endif
#include "access/tupdesc.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
@ -386,13 +384,11 @@ pg_get_tableschemadef_string(Oid tableRelationId, IncludeSequenceDefaults
atttypmod);
appendStringInfoString(&buffer, attributeTypeName);
#if PG_VERSION_NUM >= PG_VERSION_14
if (CompressionMethodIsValid(attributeForm->attcompression))
{
appendStringInfo(&buffer, " COMPRESSION %s",
GetCompressionMethodName(attributeForm->attcompression));
}
#endif
if (attributeForm->attidentity && includeIdentityDefaults)
{
@ -939,17 +935,6 @@ deparse_shard_reindex_statement(ReindexStmt *origStmt, Oid distrelid, int64 shar
bool
IsReindexWithParam_compat(ReindexStmt *reindexStmt, char *param)
{
#if PG_VERSION_NUM < PG_VERSION_14
if (strcmp(param, "concurrently") == 0)
{
return reindexStmt->concurrent;
}
else if (strcmp(param, "verbose") == 0)
{
return reindexStmt->options & REINDEXOPT_VERBOSE;
}
return false;
#else
DefElem *opt = NULL;
foreach_ptr(opt, reindexStmt->params)
{
@ -959,7 +944,6 @@ IsReindexWithParam_compat(ReindexStmt *reindexStmt, char *param)
}
}
return false;
#endif
}
@ -974,7 +958,7 @@ AddVacuumParams(ReindexStmt *reindexStmt, StringInfo buffer)
{
appendStringInfoString(temp, "VERBOSE");
}
#if PG_VERSION_NUM >= PG_VERSION_14
char *tableSpaceName = NULL;
DefElem *opt = NULL;
foreach_ptr(opt, reindexStmt->params)
@ -997,7 +981,6 @@ AddVacuumParams(ReindexStmt *reindexStmt, StringInfo buffer)
appendStringInfo(temp, "TABLESPACE %s", tableSpaceName);
}
}
#endif
if (temp->len > 0)
{
@ -1627,9 +1610,7 @@ RoleSpecString(RoleSpec *spec, bool withQuoteIdentifier)
spec->rolename;
}
#if PG_VERSION_NUM >= PG_VERSION_14
case ROLESPEC_CURRENT_ROLE:
#endif
case ROLESPEC_CURRENT_USER:
{
return withQuoteIdentifier ?

View File

@ -24,6 +24,7 @@ static void AppendDropSchemaStmt(StringInfo buf, DropStmt *stmt);
static void AppendGrantOnSchemaStmt(StringInfo buf, GrantStmt *stmt);
static void AppendGrantOnSchemaSchemas(StringInfo buf, GrantStmt *stmt);
static void AppendAlterSchemaRenameStmt(StringInfo buf, RenameStmt *stmt);
static void AppendAlterSchemaOwnerStmt(StringInfo buf, AlterOwnerStmt *stmt);
char *
DeparseCreateSchemaStmt(Node *node)
@ -68,6 +69,31 @@ DeparseGrantOnSchemaStmt(Node *node)
}
char *
DeparseAlterSchemaOwnerStmt(Node *node)
{
AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node);
StringInfoData str = { 0 };
initStringInfo(&str);
AppendAlterSchemaOwnerStmt(&str, stmt);
return str.data;
}
static void
AppendAlterSchemaOwnerStmt(StringInfo buf, AlterOwnerStmt *stmt)
{
Assert(stmt->objectType == OBJECT_SCHEMA);
appendStringInfo(buf, "ALTER SCHEMA %s OWNER TO %s;",
quote_identifier(strVal(stmt->object)),
RoleSpecString(stmt->newowner, true));
}
char *
DeparseAlterSchemaRenameStmt(Node *node)
{

View File

@ -193,7 +193,7 @@ DeparseAlterSequenceOwnerStmt(Node *node)
StringInfoData str = { 0 };
initStringInfo(&str);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
Assert(stmt->objtype == OBJECT_SEQUENCE);
AppendAlterSequenceOwnerStmt(&str, stmt);
@ -208,7 +208,7 @@ DeparseAlterSequenceOwnerStmt(Node *node)
static void
AppendAlterSequenceOwnerStmt(StringInfo buf, AlterTableStmt *stmt)
{
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
Assert(stmt->objtype == OBJECT_SEQUENCE);
RangeVar *seq = stmt->relation;
char *qualifiedSequenceName = quote_qualified_identifier(seq->schemaname,
seq->relname);
@ -274,7 +274,7 @@ DeparseAlterSequencePersistenceStmt(Node *node)
StringInfoData str = { 0 };
initStringInfo(&str);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
Assert(stmt->objtype == OBJECT_SEQUENCE);
AppendAlterSequencePersistenceStmt(&str, stmt);
@ -289,7 +289,7 @@ DeparseAlterSequencePersistenceStmt(Node *node)
static void
AppendAlterSequencePersistenceStmt(StringInfo buf, AlterTableStmt *stmt)
{
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
Assert(stmt->objtype == OBJECT_SEQUENCE);
RangeVar *seq = stmt->relation;
char *qualifiedSequenceName = quote_qualified_identifier(seq->schemaname,

View File

@ -229,7 +229,6 @@ AppendStatTypes(StringInfo buf, CreateStatsStmt *stmt)
}
#if PG_VERSION_NUM >= PG_VERSION_14
static void
AppendColumnNames(StringInfo buf, CreateStatsStmt *stmt)
{
@ -257,36 +256,6 @@ AppendColumnNames(StringInfo buf, CreateStatsStmt *stmt)
}
#else
static void
AppendColumnNames(StringInfo buf, CreateStatsStmt *stmt)
{
ColumnRef *column = NULL;
foreach_ptr(column, stmt->exprs)
{
if (!IsA(column, ColumnRef) || list_length(column->fields) != 1)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg(
"only simple column references are allowed in CREATE STATISTICS")));
}
char *columnName = NameListToQuotedString(column->fields);
appendStringInfoString(buf, columnName);
if (column != llast(stmt->exprs))
{
appendStringInfoString(buf, ", ");
}
}
}
#endif
static void
AppendTableName(StringInfo buf, CreateStatsStmt *stmt)
{

View File

@ -31,6 +31,8 @@ static void AppendAlterTableStmt(StringInfo buf, AlterTableStmt *stmt);
static void AppendAlterTableCmd(StringInfo buf, AlterTableCmd *alterTableCmd,
AlterTableStmt *stmt);
static void AppendAlterTableCmdAddColumn(StringInfo buf, AlterTableCmd *alterTableCmd);
static void AppendAlterTableCmdDropConstraint(StringInfo buf,
AlterTableCmd *alterTableCmd);
char *
DeparseAlterTableSchemaStmt(Node *node)
@ -75,7 +77,7 @@ DeparseAlterTableStmt(Node *node)
StringInfoData str = { 0 };
initStringInfo(&str);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_TABLE);
Assert(stmt->objtype == OBJECT_TABLE);
AppendAlterTableStmt(&str, stmt);
return str.data;
@ -94,7 +96,7 @@ AppendAlterTableStmt(StringInfo buf, AlterTableStmt *stmt)
stmt->relation->relname);
ListCell *cmdCell = NULL;
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_TABLE);
Assert(stmt->objtype == OBJECT_TABLE);
appendStringInfo(buf, "ALTER TABLE %s", identifier);
foreach(cmdCell, stmt->cmds)
@ -279,7 +281,9 @@ AppendAlterTableCmdAddConstraint(StringInfo buf, Constraint *constraint,
appendStringInfoString(buf, " REFERENCES");
appendStringInfo(buf, " %s", quote_identifier(constraint->pktable->relname));
appendStringInfo(buf, " %s", quote_qualified_identifier(
constraint->pktable->schemaname,
constraint->pktable->relname));
if (list_length(constraint->pk_attrs) > 0)
{
@ -409,6 +413,12 @@ AppendAlterTableCmd(StringInfo buf, AlterTableCmd *alterTableCmd, AlterTableStmt
break;
}
case AT_DropConstraint:
{
AppendAlterTableCmdDropConstraint(buf, alterTableCmd);
break;
}
case AT_AddConstraint:
{
Constraint *constraint = (Constraint *) alterTableCmd->def;
@ -486,3 +496,27 @@ AppendAlterTableCmdAddColumn(StringInfo buf, AlterTableCmd *alterTableCmd)
appendStringInfo(buf, " COLLATE %s", identifier);
}
}
/*
* AppendAlterTableCmdDropConstraint builds and appends to the given buffer an
* AT_DropConstraint command from given AlterTableCmd object in the form
* DROP CONSTRAINT ...
*/
static void
AppendAlterTableCmdDropConstraint(StringInfo buf, AlterTableCmd *alterTableCmd)
{
appendStringInfoString(buf, " DROP CONSTRAINT");
if (alterTableCmd->missing_ok)
{
appendStringInfoString(buf, " IF EXISTS");
}
appendStringInfo(buf, " %s", quote_identifier(alterTableCmd->name));
if (alterTableCmd->behavior == DROP_CASCADE)
{
appendStringInfoString(buf, " CASCADE");
}
}

View File

@ -122,7 +122,7 @@ DeparseAlterTypeStmt(Node *node)
StringInfoData str = { 0 };
initStringInfo(&str);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_TYPE);
Assert(stmt->objtype == OBJECT_TYPE);
AppendAlterTypeStmt(&str, stmt);
@ -137,7 +137,7 @@ AppendAlterTypeStmt(StringInfo buf, AlterTableStmt *stmt)
stmt->relation->relname);
ListCell *cmdCell = NULL;
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_TYPE);
Assert(stmt->objtype == OBJECT_TYPE);
appendStringInfo(buf, "ALTER TYPE %s", identifier);
foreach(cmdCell, stmt->cmds)

View File

@ -245,11 +245,7 @@ QualifyCollate(CollateClause *collClause, bool missing_ok)
List *objName = NIL;
List *objArgs = NIL;
#if PG_VERSION_NUM >= PG_VERSION_14
getObjectIdentityParts(&collationAddress, &objName, &objArgs, false);
#else
getObjectIdentityParts(&collationAddress, &objName, &objArgs);
#endif
collClause->collname = NIL;
char *name = NULL;

View File

@ -34,7 +34,7 @@ void
QualifyAlterSequenceOwnerStmt(Node *node)
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
Assert(stmt->objtype == OBJECT_SEQUENCE);
RangeVar *seq = stmt->relation;
@ -62,7 +62,7 @@ void
QualifyAlterSequencePersistenceStmt(Node *node)
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE);
Assert(stmt->objtype == OBJECT_SEQUENCE);
RangeVar *seq = stmt->relation;

View File

@ -123,7 +123,7 @@ void
QualifyAlterTypeStmt(Node *node)
{
AlterTableStmt *stmt = castNode(AlterTableStmt, node);
Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_TYPE);
Assert(stmt->objtype == OBJECT_TYPE);
if (stmt->relation->schemaname == NULL)
{

File diff suppressed because it is too large Load Diff

View File

@ -5470,27 +5470,6 @@ get_rule_expr(Node *node, deparse_context *context,
case T_RelabelType:
{
RelabelType *relabel = (RelabelType *) node;
/*
* This is a Citus specific modification
* The planner converts CollateExpr to RelabelType
* and here we convert back.
*/
if (relabel->resultcollid != InvalidOid)
{
CollateExpr *collate = RelabelTypeToCollateExpr(relabel);
Node *arg = (Node *) collate->arg;
if (!PRETTY_PAREN(context))
appendStringInfoChar(buf, '(');
get_rule_expr_paren(arg, context, showimplicit, node);
appendStringInfo(buf, " COLLATE %s",
generate_collation_name(collate->collOid));
if (!PRETTY_PAREN(context))
appendStringInfoChar(buf, ')');
}
else
{
Node *arg = (Node *) relabel->arg;
if (relabel->relabelformat == COERCE_IMPLICIT_CAST &&
@ -5507,7 +5486,6 @@ get_rule_expr(Node *node, deparse_context *context,
node);
}
}
}
break;
case T_CoerceViaIO:

View File

@ -5696,27 +5696,6 @@ get_rule_expr(Node *node, deparse_context *context,
case T_RelabelType:
{
RelabelType *relabel = (RelabelType *) node;
/*
* This is a Citus specific modification
* The planner converts CollateExpr to RelabelType
* and here we convert back.
*/
if (relabel->resultcollid != InvalidOid)
{
CollateExpr *collate = RelabelTypeToCollateExpr(relabel);
Node *arg = (Node *) collate->arg;
if (!PRETTY_PAREN(context))
appendStringInfoChar(buf, '(');
get_rule_expr_paren(arg, context, showimplicit, node);
appendStringInfo(buf, " COLLATE %s",
generate_collation_name(collate->collOid));
if (!PRETTY_PAREN(context))
appendStringInfoChar(buf, ')');
}
else
{
Node *arg = (Node *) relabel->arg;
if (relabel->relabelformat == COERCE_IMPLICIT_CAST &&
@ -5733,7 +5712,6 @@ get_rule_expr(Node *node, deparse_context *context,
node);
}
}
}
break;
case T_CoerceViaIO:

View File

@ -496,11 +496,7 @@ struct TaskPlacementExecution;
/* GUC, determining whether Citus opens 1 connection per task */
bool ForceMaxQueryParallelization = false;
int MaxAdaptiveExecutorPoolSize = 16;
#if PG_VERSION_NUM >= PG_VERSION_14
bool EnableBinaryProtocol = true;
#else
bool EnableBinaryProtocol = false;
#endif
/* GUC, number of ms to wait between opening connections to the same worker */
int ExecutorSlowStartInterval = 10;
@ -805,6 +801,8 @@ AdaptiveExecutor(CitusScanState *scanState)
TupleDestination *defaultTupleDest =
CreateTupleStoreTupleDest(scanState->tuplestorestate, tupleDescriptor);
bool localExecutionSupported = true;
if (RequestedForExplainAnalyze(scanState))
{
/*
@ -814,6 +812,12 @@ AdaptiveExecutor(CitusScanState *scanState)
UseCoordinatedTransaction();
taskList = ExplainAnalyzeTaskList(taskList, defaultTupleDest, tupleDescriptor,
paramListInfo);
/*
* Multiple queries per task is not supported with local execution. See the Assert in
* TupleDestDestReceiverReceive.
*/
localExecutionSupported = false;
}
bool hasDependentJobs = job->dependentJobList != NIL;
@ -836,8 +840,6 @@ AdaptiveExecutor(CitusScanState *scanState)
TransactionProperties xactProperties = DecideTransactionPropertiesForTaskList(
distributedPlan->modLevel, taskList, excludeFromXact);
bool localExecutionSupported = true;
/*
* In some rare cases, we have prepared statements that pass a parameter
* and never used in the query, mark such parameters' type as Invalid(0),
@ -1009,6 +1011,32 @@ ExecuteTaskListOutsideTransaction(RowModifyLevel modLevel, List *taskList,
}
/*
* ExecuteTaskListIntoTupleDestWithParam is a proxy to ExecuteTaskListExtended() which uses
* bind params from executor state, and with defaults for some of the arguments.
*/
uint64
ExecuteTaskListIntoTupleDestWithParam(RowModifyLevel modLevel, List *taskList,
TupleDestination *tupleDest,
bool expectResults,
ParamListInfo paramListInfo)
{
int targetPoolSize = MaxAdaptiveExecutorPoolSize;
bool localExecutionSupported = true;
ExecutionParams *executionParams = CreateBasicExecutionParams(
modLevel, taskList, targetPoolSize, localExecutionSupported
);
executionParams->xactProperties = DecideTransactionPropertiesForTaskList(
modLevel, taskList, false);
executionParams->expectResults = expectResults;
executionParams->tupleDestination = tupleDest;
executionParams->paramListInfo = paramListInfo;
return ExecuteTaskListExtended(executionParams);
}
/*
* ExecuteTaskListIntoTupleDest is a proxy to ExecuteTaskListExtended() with defaults
* for some of the arguments.
@ -1040,7 +1068,12 @@ ExecuteTaskListIntoTupleDest(RowModifyLevel modLevel, List *taskList,
uint64
ExecuteTaskListExtended(ExecutionParams *executionParams)
{
ParamListInfo paramListInfo = NULL;
/* if there are no tasks to execute, we can return early */
if (list_length(executionParams->taskList) == 0)
{
return 0;
}
uint64 locallyProcessedRows = 0;
TupleDestination *defaultTupleDest = executionParams->tupleDestination;
@ -1053,7 +1086,7 @@ ExecuteTaskListExtended(ExecutionParams *executionParams)
DistributedExecution *execution =
CreateDistributedExecution(
executionParams->modLevel, executionParams->taskList,
paramListInfo, executionParams->targetPoolSize,
executionParams->paramListInfo, executionParams->targetPoolSize,
defaultTupleDest, &executionParams->xactProperties,
executionParams->jobIdList, executionParams->localExecutionSupported);
@ -1105,6 +1138,7 @@ CreateBasicExecutionParams(RowModifyLevel modLevel,
executionParams->expectResults = false;
executionParams->isUtilityCommand = false;
executionParams->jobIdList = NIL;
executionParams->paramListInfo = NULL;
return executionParams;
}

View File

@ -27,6 +27,8 @@
#include "distributed/listutils.h"
#include "distributed/local_executor.h"
#include "distributed/local_plan_cache.h"
#include "distributed/merge_executor.h"
#include "distributed/merge_planner.h"
#include "distributed/multi_executor.h"
#include "distributed/multi_server_executor.h"
#include "distributed/multi_router_planner.h"
@ -53,6 +55,7 @@ extern AllowedDistributionColumn AllowedDistributionColumnValue;
static Node * AdaptiveExecutorCreateScan(CustomScan *scan);
static Node * NonPushableInsertSelectCreateScan(CustomScan *scan);
static Node * DelayedErrorCreateScan(CustomScan *scan);
static Node * NonPushableMergeCommandCreateScan(CustomScan *scan);
/* functions that are common to different scans */
static void CitusBeginScan(CustomScanState *node, EState *estate, int eflags);
@ -88,6 +91,11 @@ CustomScanMethods DelayedErrorCustomScanMethods = {
DelayedErrorCreateScan
};
CustomScanMethods NonPushableMergeCommandCustomScanMethods = {
"Citus MERGE INTO ...",
NonPushableMergeCommandCreateScan
};
/*
* Define executor methods for the different executor types.
@ -111,6 +119,16 @@ static CustomExecMethods NonPushableInsertSelectCustomExecMethods = {
};
static CustomExecMethods NonPushableMergeCommandCustomExecMethods = {
.CustomName = "NonPushableMergeCommandScan",
.BeginCustomScan = CitusBeginScan,
.ExecCustomScan = NonPushableMergeCommandExecScan,
.EndCustomScan = CitusEndScan,
.ReScanCustomScan = CitusReScan,
.ExplainCustomScan = NonPushableMergeCommandExplainScan
};
/*
* IsCitusCustomState returns if a given PlanState node is a CitusCustomState node.
*/
@ -124,7 +142,8 @@ IsCitusCustomState(PlanState *planState)
CustomScanState *css = castNode(CustomScanState, planState);
if (css->methods == &AdaptiveExecutorCustomExecMethods ||
css->methods == &NonPushableInsertSelectCustomExecMethods)
css->methods == &NonPushableInsertSelectCustomExecMethods ||
css->methods == &NonPushableMergeCommandCustomExecMethods)
{
return true;
}
@ -142,6 +161,7 @@ RegisterCitusCustomScanMethods(void)
RegisterCustomScanMethods(&AdaptiveExecutorCustomScanMethods);
RegisterCustomScanMethods(&NonPushableInsertSelectCustomScanMethods);
RegisterCustomScanMethods(&DelayedErrorCustomScanMethods);
RegisterCustomScanMethods(&NonPushableMergeCommandCustomScanMethods);
}
@ -182,7 +202,7 @@ CitusBeginScan(CustomScanState *node, EState *estate, int eflags)
node->ss.ps.qual = ExecInitQual(node->ss.ps.plan->qual, (PlanState *) node);
DistributedPlan *distributedPlan = scanState->distributedPlan;
if (distributedPlan->insertSelectQuery != NULL)
if (distributedPlan->modifyQueryViaCoordinatorOrRepartition != NULL)
{
/*
* INSERT..SELECT via coordinator or re-partitioning are special because
@ -723,6 +743,26 @@ DelayedErrorCreateScan(CustomScan *scan)
}
/*
* NonPushableMergeCommandCreateScan creates the scan state for executing
* MERGE INTO ... into a distributed table with repartition of source rows.
*/
static Node *
NonPushableMergeCommandCreateScan(CustomScan *scan)
{
CitusScanState *scanState = palloc0(sizeof(CitusScanState));
scanState->executorType = MULTI_EXECUTOR_NON_PUSHABLE_MERGE_QUERY;
scanState->customScanState.ss.ps.type = T_CustomScanState;
scanState->distributedPlan = GetDistributedPlan(scan);
scanState->customScanState.methods = &NonPushableMergeCommandCustomExecMethods;
scanState->finishedPreScan = false;
scanState->finishedRemoteScan = false;
return (Node *) scanState;
}
/*
* CitusEndScan is used to clean up tuple store of the given custom scan state.
*/
@ -780,7 +820,19 @@ CitusEndScan(CustomScanState *node)
*/
static void
CitusReScan(CustomScanState *node)
{ }
{
if (node->ss.ps.ps_ResultTupleSlot)
{
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
}
ExecScanReScan(&node->ss);
CitusScanState *scanState = (CitusScanState *) node;
if (scanState->tuplestorestate)
{
tuplestore_rescan(scanState->tuplestorestate);
}
}
/*
@ -896,12 +948,6 @@ SetJobColocationId(Job *job)
{
uint32 jobColocationId = INVALID_COLOCATION_ID;
if (!job->partitionKeyValue)
{
/* if the Job has no shard key, nothing to do */
return;
}
List *rangeTableList = ExtractRangeTableEntryList(job->jobQuery);
ListCell *rangeTableCell = NULL;
foreach(rangeTableCell, rangeTableList)

View File

@ -610,6 +610,18 @@ QueryStringForFragmentsTransfer(NodeToNodeFragmentsTransfer *fragmentsTransfer)
StringInfo fragmentNamesArrayString = makeStringInfo();
int fragmentCount = 0;
NodePair *nodePair = &fragmentsTransfer->nodes;
uint32 sourceNodeId = nodePair->sourceNodeId;
/*
* If the placement is dummy, for example, queries that generate
* intermediate results at the coordinator that need to be redistributed
* to worker nodes, we need the local id.
*/
if (sourceNodeId == LOCAL_NODE_ID)
{
nodePair->sourceNodeId = GetLocalNodeId();
}
WorkerNode *sourceNode = LookupNodeByNodeIdOrError(nodePair->sourceNodeId);
appendStringInfoString(fragmentNamesArrayString, "ARRAY[");

View File

@ -20,6 +20,7 @@
#include "distributed/insert_select_planner.h"
#include "distributed/intermediate_results.h"
#include "distributed/local_executor.h"
#include "distributed/merge_planner.h"
#include "distributed/multi_executor.h"
#include "distributed/multi_partitioning_utils.h"
#include "distributed/multi_physical_planner.h"
@ -30,6 +31,7 @@
#include "distributed/distributed_planner.h"
#include "distributed/recursive_planning.h"
#include "distributed/relation_access_tracking.h"
#include "distributed/repartition_executor.h"
#include "distributed/resource_lock.h"
#include "distributed/shardinterval_utils.h"
#include "distributed/subplan_execution.h"
@ -55,8 +57,6 @@
bool EnableRepartitionedInsertSelect = true;
static List * TwoPhaseInsertSelectTaskList(Oid targetRelationId, Query *insertSelectQuery,
char *resultIdPrefix);
static void ExecutePlanIntoRelation(Oid targetRelationId, List *insertTargetList,
PlannedStmt *selectPlan, EState *executorState);
static HTAB * ExecutePlanIntoColocatedIntermediateResults(Oid targetRelationId,
@ -64,14 +64,7 @@ static HTAB * ExecutePlanIntoColocatedIntermediateResults(Oid targetRelationId,
PlannedStmt *selectPlan,
EState *executorState,
char *intermediateResultIdPrefix);
static List * BuildColumnNameListFromTargetList(Oid targetRelationId,
List *insertTargetList);
static int PartitionColumnIndexFromColumnList(Oid relationId, List *columnNameList);
static List * RedistributedInsertSelectTaskList(Query *insertSelectQuery,
CitusTableCacheEntry *targetRelation,
List **redistributedResults,
bool useBinaryFormat);
static int PartitionColumnIndex(List *insertTargetList, Var *partitionColumn);
static void WrapTaskListForProjection(List *taskList, List *projectedTargetEntries);
@ -89,7 +82,8 @@ NonPushableInsertSelectExecScan(CustomScanState *node)
{
EState *executorState = ScanStateGetExecutorState(scanState);
DistributedPlan *distributedPlan = scanState->distributedPlan;
Query *insertSelectQuery = copyObject(distributedPlan->insertSelectQuery);
Query *insertSelectQuery =
copyObject(distributedPlan->modifyQueryViaCoordinatorOrRepartition);
List *insertTargetList = insertSelectQuery->targetList;
RangeTblEntry *selectRte = ExtractSelectRangeTableEntry(insertSelectQuery);
RangeTblEntry *insertRte = ExtractResultRelationRTE(insertSelectQuery);
@ -99,7 +93,8 @@ NonPushableInsertSelectExecScan(CustomScanState *node)
HTAB *shardStateHash = NULL;
Query *selectQuery = selectRte->subquery;
PlannedStmt *selectPlan = copyObject(distributedPlan->selectPlanForInsertSelect);
PlannedStmt *selectPlan =
copyObject(distributedPlan->selectPlanForModifyViaCoordinatorOrRepartition);
/*
* If we are dealing with partitioned table, we also need to lock its
@ -111,7 +106,7 @@ NonPushableInsertSelectExecScan(CustomScanState *node)
LockPartitionRelations(targetRelationId, RowExclusiveLock);
}
if (distributedPlan->insertSelectMethod == INSERT_SELECT_REPARTITION)
if (distributedPlan->modifyWithSelectMethod == MODIFY_WITH_SELECT_REPARTITION)
{
ereport(DEBUG1, (errmsg("performing repartitioned INSERT ... SELECT")));
@ -142,9 +137,10 @@ NonPushableInsertSelectExecScan(CustomScanState *node)
CitusTableCacheEntry *targetRelation =
GetCitusTableCacheEntry(targetRelationId);
int partitionColumnIndex =
PartitionColumnIndex(insertTargetList, targetRelation->partitionColumn);
if (partitionColumnIndex == -1)
int distributionColumnIndex =
DistributionColumnIndex(insertTargetList,
targetRelation->partitionColumn);
if (distributionColumnIndex == -1)
{
char *relationName = get_rel_name(targetRelationId);
Oid schemaOid = get_rel_namespace(targetRelationId);
@ -158,13 +154,13 @@ NonPushableInsertSelectExecScan(CustomScanState *node)
}
TargetEntry *selectPartitionTE = list_nth(selectQuery->targetList,
partitionColumnIndex);
distributionColumnIndex);
const char *partitionColumnName = selectPartitionTE->resname ?
selectPartitionTE->resname : "(none)";
ereport(DEBUG2, (errmsg(
"partitioning SELECT query by column index %d with name %s",
partitionColumnIndex, quote_literal_cstr(
distributionColumnIndex, quote_literal_cstr(
partitionColumnName))));
/*
@ -182,7 +178,7 @@ NonPushableInsertSelectExecScan(CustomScanState *node)
List **redistributedResults = RedistributeTaskListResults(distResultPrefix,
distSelectTaskList,
partitionColumnIndex,
distributionColumnIndex,
targetRelation,
binaryFormat);
@ -192,7 +188,7 @@ NonPushableInsertSelectExecScan(CustomScanState *node)
* target shard. Create and execute a list of tasks of form
* INSERT INTO ... SELECT * FROM read_intermediate_results(...);
*/
List *taskList = RedistributedInsertSelectTaskList(insertSelectQuery,
List *taskList = GenerateTaskListWithRedistributedResults(insertSelectQuery,
targetRelation,
redistributedResults,
binaryFormat);
@ -235,8 +231,9 @@ NonPushableInsertSelectExecScan(CustomScanState *node)
intermediateResultIdPrefix);
/* generate tasks for the INSERT..SELECT phase */
List *taskList = TwoPhaseInsertSelectTaskList(targetRelationId,
insertSelectQuery,
List *taskList =
GenerateTaskListWithColocatedIntermediateResults(
targetRelationId, insertSelectQuery,
intermediateResultIdPrefix);
/*
@ -298,94 +295,6 @@ NonPushableInsertSelectExecScan(CustomScanState *node)
}
/*
* TwoPhaseInsertSelectTaskList generates a list of tasks for a query that
* inserts into a target relation and selects from a set of co-located
* intermediate results.
*/
static List *
TwoPhaseInsertSelectTaskList(Oid targetRelationId, Query *insertSelectQuery,
char *resultIdPrefix)
{
List *taskList = NIL;
/*
* Make a copy of the INSERT ... SELECT. We'll repeatedly replace the
* subquery of insertResultQuery for different intermediate results and
* then deparse it.
*/
Query *insertResultQuery = copyObject(insertSelectQuery);
RangeTblEntry *insertRte = ExtractResultRelationRTE(insertResultQuery);
RangeTblEntry *selectRte = ExtractSelectRangeTableEntry(insertResultQuery);
CitusTableCacheEntry *targetCacheEntry = GetCitusTableCacheEntry(targetRelationId);
int shardCount = targetCacheEntry->shardIntervalArrayLength;
uint32 taskIdIndex = 1;
uint64 jobId = INVALID_JOB_ID;
for (int shardOffset = 0; shardOffset < shardCount; shardOffset++)
{
ShardInterval *targetShardInterval =
targetCacheEntry->sortedShardIntervalArray[shardOffset];
uint64 shardId = targetShardInterval->shardId;
List *columnAliasList = NIL;
StringInfo queryString = makeStringInfo();
StringInfo resultId = makeStringInfo();
/* during COPY, the shard ID is appended to the result name */
appendStringInfo(resultId, "%s_" UINT64_FORMAT, resultIdPrefix, shardId);
/* generate the query on the intermediate result */
Query *resultSelectQuery = BuildSubPlanResultQuery(insertSelectQuery->targetList,
columnAliasList,
resultId->data);
/* put the intermediate result query in the INSERT..SELECT */
selectRte->subquery = resultSelectQuery;
/* setting an alias simplifies deparsing of RETURNING */
if (insertRte->alias == NULL)
{
Alias *alias = makeAlias(CITUS_TABLE_ALIAS, NIL);
insertRte->alias = alias;
}
/*
* Generate a query string for the query that inserts into a shard and reads
* from an intermediate result.
*
* Since CTEs have already been converted to intermediate results, they need
* to removed from the query. Otherwise, worker queries include both
* intermediate results and CTEs in the query.
*/
insertResultQuery->cteList = NIL;
deparse_shard_query(insertResultQuery, targetRelationId, shardId, queryString);
ereport(DEBUG2, (errmsg("distributed statement: %s", queryString->data)));
LockShardDistributionMetadata(shardId, ShareLock);
List *insertShardPlacementList = ActiveShardPlacementList(shardId);
RelationShard *relationShard = CitusMakeNode(RelationShard);
relationShard->relationId = targetShardInterval->relationId;
relationShard->shardId = targetShardInterval->shardId;
Task *modifyTask = CreateBasicTask(jobId, taskIdIndex, MODIFY_TASK,
queryString->data);
modifyTask->dependentTaskList = NIL;
modifyTask->anchorShardId = shardId;
modifyTask->taskPlacementList = insertShardPlacementList;
modifyTask->relationShardList = list_make1(relationShard);
modifyTask->replicationModel = targetCacheEntry->replicationModel;
taskList = lappend(taskList, modifyTask);
taskIdIndex++;
}
return taskList;
}
/*
* ExecutePlanIntoColocatedIntermediateResults executes the given PlannedStmt
* and inserts tuples into a set of intermediate results that are colocated with
@ -464,7 +373,7 @@ ExecutePlanIntoRelation(Oid targetRelationId, List *insertTargetList,
* BuildColumnNameListForCopyStatement build the column name list given the insert
* target list.
*/
static List *
List *
BuildColumnNameListFromTargetList(Oid targetRelationId, List *insertTargetList)
{
List *columnNameList = NIL;
@ -510,136 +419,17 @@ PartitionColumnIndexFromColumnList(Oid relationId, List *columnNameList)
/*
* IsSupportedRedistributionTarget determines whether re-partitioning into the
* given target relation is supported.
*/
bool
IsSupportedRedistributionTarget(Oid targetRelationId)
{
CitusTableCacheEntry *tableEntry = GetCitusTableCacheEntry(targetRelationId);
if (!IsCitusTableTypeCacheEntry(tableEntry, HASH_DISTRIBUTED) &&
!IsCitusTableTypeCacheEntry(tableEntry, RANGE_DISTRIBUTED))
{
return false;
}
return true;
}
/*
* RedistributedInsertSelectTaskList returns a task list to insert given
* redistributedResults into the given target relation.
* redistributedResults[shardIndex] is list of cstrings each of which is
* a result name which should be inserted into
* targetRelation->sortedShardIntervalArray[shardIndex].
*/
static List *
RedistributedInsertSelectTaskList(Query *insertSelectQuery,
CitusTableCacheEntry *targetRelation,
List **redistributedResults,
bool useBinaryFormat)
{
List *taskList = NIL;
/*
* Make a copy of the INSERT ... SELECT. We'll repeatedly replace the
* subquery of insertResultQuery for different intermediate results and
* then deparse it.
*/
Query *insertResultQuery = copyObject(insertSelectQuery);
RangeTblEntry *insertRte = ExtractResultRelationRTE(insertResultQuery);
RangeTblEntry *selectRte = ExtractSelectRangeTableEntry(insertResultQuery);
List *selectTargetList = selectRte->subquery->targetList;
Oid targetRelationId = targetRelation->relationId;
int shardCount = targetRelation->shardIntervalArrayLength;
int shardOffset = 0;
uint32 taskIdIndex = 1;
uint64 jobId = INVALID_JOB_ID;
for (shardOffset = 0; shardOffset < shardCount; shardOffset++)
{
ShardInterval *targetShardInterval =
targetRelation->sortedShardIntervalArray[shardOffset];
List *resultIdList = redistributedResults[targetShardInterval->shardIndex];
uint64 shardId = targetShardInterval->shardId;
StringInfo queryString = makeStringInfo();
/* skip empty tasks */
if (resultIdList == NIL)
{
continue;
}
/* sort result ids for consistent test output */
List *sortedResultIds = SortList(resultIdList, pg_qsort_strcmp);
/* generate the query on the intermediate result */
Query *fragmentSetQuery = BuildReadIntermediateResultsArrayQuery(selectTargetList,
NIL,
sortedResultIds,
useBinaryFormat);
/* put the intermediate result query in the INSERT..SELECT */
selectRte->subquery = fragmentSetQuery;
/* setting an alias simplifies deparsing of RETURNING */
if (insertRte->alias == NULL)
{
Alias *alias = makeAlias(CITUS_TABLE_ALIAS, NIL);
insertRte->alias = alias;
}
/*
* Generate a query string for the query that inserts into a shard and reads
* from an intermediate result.
*
* Since CTEs have already been converted to intermediate results, they need
* to removed from the query. Otherwise, worker queries include both
* intermediate results and CTEs in the query.
*/
insertResultQuery->cteList = NIL;
deparse_shard_query(insertResultQuery, targetRelationId, shardId, queryString);
ereport(DEBUG2, (errmsg("distributed statement: %s", queryString->data)));
LockShardDistributionMetadata(shardId, ShareLock);
List *insertShardPlacementList = ActiveShardPlacementList(shardId);
RelationShard *relationShard = CitusMakeNode(RelationShard);
relationShard->relationId = targetShardInterval->relationId;
relationShard->shardId = targetShardInterval->shardId;
Task *modifyTask = CreateBasicTask(jobId, taskIdIndex, MODIFY_TASK,
queryString->data);
modifyTask->dependentTaskList = NIL;
modifyTask->anchorShardId = shardId;
modifyTask->taskPlacementList = insertShardPlacementList;
modifyTask->relationShardList = list_make1(relationShard);
modifyTask->replicationModel = targetRelation->replicationModel;
taskList = lappend(taskList, modifyTask);
taskIdIndex++;
}
return taskList;
}
/*
* PartitionColumnIndex finds the index of given partition column in the
* DistributionColumnIndex finds the index of given distribution column in the
* given target list.
*/
static int
PartitionColumnIndex(List *insertTargetList, Var *partitionColumn)
int
DistributionColumnIndex(List *insertTargetList, Var *distributionColumn)
{
TargetEntry *insertTargetEntry = NULL;
int targetEntryIndex = 0;
foreach_ptr(insertTargetEntry, insertTargetList)
{
if (insertTargetEntry->resno == partitionColumn->varattno)
if (insertTargetEntry->resno == distributionColumn->varattno)
{
return targetEntryIndex;
}
@ -651,59 +441,6 @@ PartitionColumnIndex(List *insertTargetList, Var *partitionColumn)
}
/*
* IsRedistributablePlan returns true if the given plan is a redistrituable plan.
*/
bool
IsRedistributablePlan(Plan *selectPlan)
{
if (!EnableRepartitionedInsertSelect)
{
return false;
}
/* don't redistribute if query is not distributed or requires merge on coordinator */
if (!IsCitusCustomScan(selectPlan))
{
return false;
}
DistributedPlan *distSelectPlan =
GetDistributedPlan((CustomScan *) selectPlan);
Job *distSelectJob = distSelectPlan->workerJob;
List *distSelectTaskList = distSelectJob->taskList;
/*
* Don't use redistribution if only one task. This is to keep the existing
* behaviour for CTEs that the last step is a read_intermediate_result()
* call. It doesn't hurt much in other cases too.
*/
if (list_length(distSelectTaskList) <= 1)
{
return false;
}
/* don't use redistribution for repartition joins for now */
if (distSelectJob->dependentJobList != NIL)
{
return false;
}
if (distSelectPlan->combineQuery != NULL)
{
Query *combineQuery = (Query *) distSelectPlan->combineQuery;
if (contain_nextval_expression_walker((Node *) combineQuery->targetList, NULL))
{
/* nextval needs to be evaluated on the coordinator */
return false;
}
}
return true;
}
/*
* WrapTaskListForProjection wraps task query string to only select given
* projected columns. It modifies the taskList.

View File

@ -84,6 +84,7 @@
#include "distributed/commands/utility_hook.h"
#include "distributed/citus_custom_scan.h"
#include "distributed/citus_ruleutils.h"
#include "distributed/colocation_utils.h"
#include "distributed/query_utils.h"
#include "distributed/deparse_shard_query.h"
#include "distributed/listutils.h"
@ -129,6 +130,8 @@ static void LogLocalCommand(Task *task);
static uint64 LocallyPlanAndExecuteMultipleQueries(List *queryStrings,
TupleDestination *tupleDest,
Task *task);
static void SetColocationIdAndPartitionKeyValueForTasks(List *taskList,
Job *distributedPlan);
static void LocallyExecuteUtilityTask(Task *task);
static void ExecuteUdfTaskQuery(Query *localUdfCommandQuery);
static void EnsureTransitionPossible(LocalExecutionStatus from,
@ -228,6 +231,17 @@ ExecuteLocalTaskListExtended(List *taskList,
EnsureTaskExecutionAllowed(isRemote);
}
/*
* If workerJob has a partitionKeyValue, we need to set the colocation id
* and partition key value for each task before we start executing them
* because tenant stats are collected based on these values of a task.
*/
if (distributedPlan != NULL && distributedPlan->workerJob != NULL && taskList != NIL)
{
SetJobColocationId(distributedPlan->workerJob);
SetColocationIdAndPartitionKeyValueForTasks(taskList, distributedPlan->workerJob);
}
/*
* Use a new memory context that gets reset after every task to free
* the deparsed query string and query plan.
@ -367,6 +381,25 @@ ExecuteLocalTaskListExtended(List *taskList,
}
/*
* SetColocationIdAndPartitionKeyValueForTasks sets colocationId and partitionKeyValue
* for the tasks in the taskList.
*/
static void
SetColocationIdAndPartitionKeyValueForTasks(List *taskList, Job *workerJob)
{
if (workerJob->colocationId != INVALID_COLOCATION_ID)
{
Task *task = NULL;
foreach_ptr(task, taskList)
{
task->colocationId = workerJob->colocationId;
task->partitionKeyValue = workerJob->partitionKeyValue;
}
}
}
/*
* LocallyPlanAndExecuteMultipleQueries plans and executes the given query strings
* one by one.

View File

@ -0,0 +1,337 @@
/*-------------------------------------------------------------------------
*
* merge_executor.c
*
* Executor logic for MERGE SQL statement.
*
* Copyright (c) Citus Data, Inc.
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "miscadmin.h"
#include "distributed/distributed_execution_locks.h"
#include "distributed/insert_select_executor.h"
#include "distributed/intermediate_results.h"
#include "distributed/listutils.h"
#include "distributed/merge_executor.h"
#include "distributed/merge_planner.h"
#include "distributed/multi_executor.h"
#include "distributed/multi_partitioning_utils.h"
#include "distributed/multi_router_planner.h"
#include "distributed/repartition_executor.h"
#include "distributed/subplan_execution.h"
#include "nodes/execnodes.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
static void ExecuteSourceAtWorkerAndRepartition(CitusScanState *scanState);
static void ExecuteSourceAtCoordAndRedistribution(CitusScanState *scanState);
static HTAB * ExecuteMergeSourcePlanIntoColocatedIntermediateResults(Oid targetRelationId,
Query *mergeQuery,
List *
sourceTargetList,
PlannedStmt *
sourcePlan,
EState *executorState,
char *
intermediateResultIdPrefix,
int
partitionColumnIndex);
/*
* NonPushableMergeCommandExecScan performs an MERGE INTO distributed_table
* USING (source-query) ... command. This can be done either by aggregating
* task results at the coordinator and repartitioning the results, or by
* repartitioning task results and directly transferring data between nodes.
*/
TupleTableSlot *
NonPushableMergeCommandExecScan(CustomScanState *node)
{
CitusScanState *scanState = (CitusScanState *) node;
DistributedPlan *distributedPlan = scanState->distributedPlan;
if (!scanState->finishedRemoteScan)
{
switch (distributedPlan->modifyWithSelectMethod)
{
case MODIFY_WITH_SELECT_REPARTITION:
{
ExecuteSourceAtWorkerAndRepartition(scanState);
break;
}
case MODIFY_WITH_SELECT_VIA_COORDINATOR:
{
ExecuteSourceAtCoordAndRedistribution(scanState);
break;
}
default:
{
ereport(ERROR, (errmsg("Unexpected MERGE execution method(%d)",
distributedPlan->modifyWithSelectMethod)));
}
}
scanState->finishedRemoteScan = true;
}
TupleTableSlot *resultSlot = ReturnTupleFromTuplestore(scanState);
return resultSlot;
}
/*
* ExecuteSourceAtWorkerAndRepartition Executes the Citus distributed plan, including any
* sub-plans, and captures the results in intermediate files. Subsequently, redistributes
* the result files to ensure colocation with the target, and directs the MERGE SQL
* operation to the target shards on the worker nodes, utilizing the colocated
* intermediate files as the data source.
*/
static void
ExecuteSourceAtWorkerAndRepartition(CitusScanState *scanState)
{
DistributedPlan *distributedPlan = scanState->distributedPlan;
Query *mergeQuery =
copyObject(distributedPlan->modifyQueryViaCoordinatorOrRepartition);
RangeTblEntry *targetRte = ExtractResultRelationRTE(mergeQuery);
RangeTblEntry *sourceRte = ExtractMergeSourceRangeTableEntry(mergeQuery);
Oid targetRelationId = targetRte->relid;
bool hasReturning = distributedPlan->expectResults;
Query *sourceQuery = sourceRte->subquery;
PlannedStmt *sourcePlan =
copyObject(distributedPlan->selectPlanForModifyViaCoordinatorOrRepartition);
EState *executorState = ScanStateGetExecutorState(scanState);
/*
* If we are dealing with partitioned table, we also need to lock its
* partitions. Here we only lock targetRelation, we acquire necessary
* locks on source tables during execution of those source queries.
*/
if (PartitionedTable(targetRelationId))
{
LockPartitionRelations(targetRelationId, RowExclusiveLock);
}
bool randomAccess = true;
bool interTransactions = false;
DistributedPlan *distSourcePlan =
GetDistributedPlan((CustomScan *) sourcePlan->planTree);
Job *distSourceJob = distSourcePlan->workerJob;
List *distSourceTaskList = distSourceJob->taskList;
bool binaryFormat =
CanUseBinaryCopyFormatForTargetList(sourceQuery->targetList);
ereport(DEBUG1, (errmsg("Executing subplans of the source query and "
"storing the results at the respective node(s)")));
ExecuteSubPlans(distSourcePlan);
/*
* We have a separate directory for each transaction, so choosing
* the same result prefix won't cause filename conflicts. Results
* directory name also includes node id and database id, so we don't
* need to include them in the filename. We include job id here for
* the case "MERGE USING <source query>" is executed recursively.
*/
StringInfo distResultPrefixString = makeStringInfo();
appendStringInfo(distResultPrefixString,
"repartitioned_results_" UINT64_FORMAT,
distSourceJob->jobId);
char *distResultPrefix = distResultPrefixString->data;
CitusTableCacheEntry *targetRelation = GetCitusTableCacheEntry(targetRelationId);
ereport(DEBUG1, (errmsg("Redistributing source result rows across nodes")));
/*
* partitionColumnIndex determines the column in the selectTaskList to
* use for (re)partitioning of the source result, which will colocate
* the result data with the target.
*/
int partitionColumnIndex = distributedPlan->sourceResultRepartitionColumnIndex;
/*
* Below call partitions the results using shard ranges and partition method of
* targetRelation, and then colocates the result files with shards. These
* transfers are done by calls to fetch_intermediate_results() between nodes.
*/
List **redistributedResults =
RedistributeTaskListResults(distResultPrefix,
distSourceTaskList, partitionColumnIndex,
targetRelation, binaryFormat);
ereport(DEBUG1, (errmsg("Executing final MERGE on workers using "
"intermediate results")));
/*
* At this point source query has been executed on workers and results
* have been fetched in such a way that they are colocated with corresponding
* target shard(s). Create and execute a list of tasks of form
* MERGE INTO ... USING SELECT * FROM read_intermediate_results(...);
*/
List *taskList =
GenerateTaskListWithRedistributedResults(mergeQuery,
targetRelation,
redistributedResults,
binaryFormat);
scanState->tuplestorestate =
tuplestore_begin_heap(randomAccess, interTransactions, work_mem);
ParamListInfo paramListInfo = executorState->es_param_list_info;
TupleDesc tupleDescriptor = ScanStateGetTupleDescriptor(scanState);
TupleDestination *tupleDest =
CreateTupleStoreTupleDest(scanState->tuplestorestate,
tupleDescriptor);
uint64 rowsMerged =
ExecuteTaskListIntoTupleDestWithParam(ROW_MODIFY_NONCOMMUTATIVE, taskList,
tupleDest,
hasReturning,
paramListInfo);
executorState->es_processed = rowsMerged;
}
/*
* ExecuteSourceAtCoordAndRedistribution Executes the plan that necessitates evaluation
* at the coordinator and redistributes the resulting rows to intermediate files,
* ensuring colocation with the target shards. Directs the MERGE SQL operation to the
* target shards on the worker nodes, utilizing the colocated intermediate files as the
* data source.
*/
void
ExecuteSourceAtCoordAndRedistribution(CitusScanState *scanState)
{
EState *executorState = ScanStateGetExecutorState(scanState);
DistributedPlan *distributedPlan = scanState->distributedPlan;
Query *mergeQuery =
copyObject(distributedPlan->modifyQueryViaCoordinatorOrRepartition);
RangeTblEntry *targetRte = ExtractResultRelationRTE(mergeQuery);
RangeTblEntry *sourceRte = ExtractMergeSourceRangeTableEntry(mergeQuery);
Query *sourceQuery = sourceRte->subquery;
Oid targetRelationId = targetRte->relid;
PlannedStmt *sourcePlan =
copyObject(distributedPlan->selectPlanForModifyViaCoordinatorOrRepartition);
char *intermediateResultIdPrefix = distributedPlan->intermediateResultIdPrefix;
bool hasReturning = distributedPlan->expectResults;
int partitionColumnIndex = distributedPlan->sourceResultRepartitionColumnIndex;
/*
* If we are dealing with partitioned table, we also need to lock its
* partitions. Here we only lock targetRelation, we acquire necessary
* locks on source tables during execution of those source queries.
*/
if (PartitionedTable(targetRelationId))
{
LockPartitionRelations(targetRelationId, RowExclusiveLock);
}
ereport(DEBUG1, (errmsg("Collect source query results on coordinator")));
List *prunedTaskList = NIL;
HTAB *shardStateHash =
ExecuteMergeSourcePlanIntoColocatedIntermediateResults(
targetRelationId,
mergeQuery,
sourceQuery->targetList,
sourcePlan,
executorState,
intermediateResultIdPrefix,
partitionColumnIndex);
ereport(DEBUG1, (errmsg("Create a MERGE task list that needs to be routed")));
/* generate tasks for the .. phase */
List *taskList =
GenerateTaskListWithColocatedIntermediateResults(targetRelationId, mergeQuery,
intermediateResultIdPrefix);
/*
* We cannot actually execute MERGE INTO ... tasks that read from
* intermediate results that weren't created because no rows were
* written to them. Prune those tasks out by only including tasks
* on shards with connections.
*/
Task *task = NULL;
foreach_ptr(task, taskList)
{
uint64 shardId = task->anchorShardId;
bool shardModified = false;
hash_search(shardStateHash, &shardId, HASH_FIND, &shardModified);
if (shardModified)
{
prunedTaskList = lappend(prunedTaskList, task);
}
}
if (prunedTaskList == NIL)
{
/* No task to execute */
return;
}
ereport(DEBUG1, (errmsg("Execute MERGE task list")));
bool randomAccess = true;
bool interTransactions = false;
Assert(scanState->tuplestorestate == NULL);
scanState->tuplestorestate = tuplestore_begin_heap(randomAccess, interTransactions,
work_mem);
TupleDesc tupleDescriptor = ScanStateGetTupleDescriptor(scanState);
ParamListInfo paramListInfo = executorState->es_param_list_info;
TupleDestination *tupleDest =
CreateTupleStoreTupleDest(scanState->tuplestorestate, tupleDescriptor);
uint64 rowsMerged =
ExecuteTaskListIntoTupleDestWithParam(ROW_MODIFY_NONCOMMUTATIVE,
prunedTaskList,
tupleDest,
hasReturning,
paramListInfo);
executorState->es_processed = rowsMerged;
}
/*
* ExecuteMergeSourcePlanIntoColocatedIntermediateResults Executes the given PlannedStmt
* and inserts tuples into a set of intermediate results that are colocated with the
* target table for further processing MERGE INTO. It also returns the hash of shard
* states that were used to insert tuplesinto the target relation.
*/
static HTAB *
ExecuteMergeSourcePlanIntoColocatedIntermediateResults(Oid targetRelationId,
Query *mergeQuery,
List *sourceTargetList,
PlannedStmt *sourcePlan,
EState *executorState,
char *intermediateResultIdPrefix,
int partitionColumnIndex)
{
ParamListInfo paramListInfo = executorState->es_param_list_info;
/* Get column name list and partition column index for the target table */
List *columnNameList =
BuildColumnNameListFromTargetList(targetRelationId, sourceTargetList);
/* set up a DestReceiver that copies into the intermediate file */
const bool publishableData = false;
CitusCopyDestReceiver *copyDest = CreateCitusCopyDestReceiver(targetRelationId,
columnNameList,
partitionColumnIndex,
executorState,
intermediateResultIdPrefix,
publishableData);
/* We can skip when writing to intermediate files */
copyDest->skipCoercions = true;
ExecutePlanIntoDestReceiver(sourcePlan, paramListInfo, (DestReceiver *) copyDest);
executorState->es_processed = copyDest->tuplesSent;
XactModificationLevel = XACT_MODIFICATION_DATA;
return copyDest->shardStateHash;
}

View File

@ -455,7 +455,7 @@ ReadFileIntoTupleStore(char *fileName, char *copyFormat, TupleDesc tupleDescript
location);
copyOptions = lappend(copyOptions, copyOption);
CopyFromState copyState = BeginCopyFrom_compat(NULL, stubRelation, NULL,
CopyFromState copyState = BeginCopyFrom(NULL, stubRelation, NULL,
fileName, false, NULL,
NULL, copyOptions);

View File

@ -24,13 +24,14 @@
#include "distributed/multi_executor.h"
#include "distributed/multi_physical_planner.h"
#include "distributed/multi_server_executor.h"
#include "distributed/multi_router_planner.h"
#include "distributed/coordinator_protocol.h"
#include "distributed/subplan_execution.h"
#include "distributed/tuple_destination.h"
#include "distributed/worker_protocol.h"
#include "utils/lsyscache.h"
int RemoteTaskCheckInterval = 100; /* per cycle sleep interval in millisecs */
int RemoteTaskCheckInterval = 10; /* per cycle sleep interval in millisecs */
int TaskExecutorType = MULTI_EXECUTOR_ADAPTIVE; /* distributed executor type */
bool EnableRepartitionJoins = false;
@ -47,8 +48,13 @@ JobExecutorType(DistributedPlan *distributedPlan)
{
Job *job = distributedPlan->workerJob;
if (distributedPlan->insertSelectQuery != NULL)
if (distributedPlan->modifyQueryViaCoordinatorOrRepartition != NULL)
{
if (IsMergeQuery(distributedPlan->modifyQueryViaCoordinatorOrRepartition))
{
return MULTI_EXECUTOR_NON_PUSHABLE_MERGE_QUERY;
}
/*
* We go through
* MULTI_EXECUTOR_NON_PUSHABLE_INSERT_SELECT because

View File

@ -797,11 +797,7 @@ BuildExistingQueryIdHash(void)
{
const int userIdAttributeNumber = 1;
const int dbIdAttributeNumber = 2;
#if PG_VERSION_NUM >= PG_VERSION_14
const int queryIdAttributeNumber = 4;
#else
const int queryIdAttributeNumber = 3;
#endif
Datum commandTypeDatum = (Datum) 0;
bool missingOK = true;

View File

@ -0,0 +1,301 @@
/*-------------------------------------------------------------------
*
* repartition_executor.c
*
* Definitions for public functions and types related to repartition
* of select query results.
*
* Copyright (c) Citus Data, Inc.
*-------------------------------------------------------------------
*/
#include "postgres.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/parsenodes.h"
#include "distributed/citus_custom_scan.h"
#include "distributed/intermediate_results.h"
#include "distributed/listutils.h"
#include "distributed/multi_physical_planner.h"
#include "distributed/multi_router_planner.h"
#include "distributed/recursive_planning.h"
#include "distributed/repartition_executor.h"
#include "distributed/resource_lock.h"
/*
* IsSupportedRedistributionTarget determines whether re-partitioning into the
* given target relation is supported.
*/
bool
IsSupportedRedistributionTarget(Oid targetRelationId)
{
CitusTableCacheEntry *tableEntry = GetCitusTableCacheEntry(targetRelationId);
if (!IsCitusTableTypeCacheEntry(tableEntry, HASH_DISTRIBUTED) &&
!IsCitusTableTypeCacheEntry(tableEntry, RANGE_DISTRIBUTED))
{
return false;
}
return true;
}
/*
* IsRedistributablePlan returns true if the given plan is a distributable plan.
*/
bool
IsRedistributablePlan(Plan *selectPlan)
{
if (!EnableRepartitionedInsertSelect)
{
return false;
}
/*
* Don't redistribute if query is not distributed or requires
* merge on coordinator
*/
if (!IsCitusCustomScan(selectPlan))
{
return false;
}
DistributedPlan *distSelectPlan =
GetDistributedPlan((CustomScan *) selectPlan);
Job *distSelectJob = distSelectPlan->workerJob;
List *distSelectTaskList = distSelectJob->taskList;
/*
* Don't use redistribution if only one task. This is to keep the existing
* behaviour for CTEs that the last step is a read_intermediate_result()
* call. It doesn't hurt much in other cases too.
*/
if (list_length(distSelectTaskList) <= 1)
{
return false;
}
/* don't use redistribution for repartition joins for now */
if (distSelectJob->dependentJobList != NIL)
{
return false;
}
if (distSelectPlan->combineQuery != NULL)
{
Query *combineQuery = (Query *) distSelectPlan->combineQuery;
if (contain_nextval_expression_walker((Node *) combineQuery->targetList, NULL))
{
/* nextval needs to be evaluated on the coordinator */
return false;
}
}
return true;
}
/*
* GenerateTaskListWithColocatedIntermediateResults generates a list of tasks
* for a query that inserts into a target relation and selects from a set of
* co-located intermediate results.
*/
List *
GenerateTaskListWithColocatedIntermediateResults(Oid targetRelationId,
Query *
modifyQueryViaCoordinatorOrRepartition,
char *resultIdPrefix)
{
List *taskList = NIL;
/*
* Make a copy of the <MODIFY-SQL> ... SELECT. We'll repeatedly replace
* the subquery of modifyResultQuery for different intermediate results and
* then deparse it.
*/
Query *modifyWithResultQuery = copyObject(modifyQueryViaCoordinatorOrRepartition);
RangeTblEntry *insertRte = ExtractResultRelationRTE(modifyWithResultQuery);
RangeTblEntry *selectRte = ExtractSourceResultRangeTableEntry(modifyWithResultQuery);
CitusTableCacheEntry *targetCacheEntry = GetCitusTableCacheEntry(targetRelationId);
int shardCount = targetCacheEntry->shardIntervalArrayLength;
uint32 taskIdIndex = 1;
uint64 jobId = INVALID_JOB_ID;
for (int shardOffset = 0; shardOffset < shardCount; shardOffset++)
{
ShardInterval *targetShardInterval =
targetCacheEntry->sortedShardIntervalArray[shardOffset];
uint64 shardId = targetShardInterval->shardId;
List *columnAliasList = NIL;
StringInfo queryString = makeStringInfo();
StringInfo resultId = makeStringInfo();
/* during COPY, the shard ID is appended to the result name */
appendStringInfo(resultId, "%s_" UINT64_FORMAT, resultIdPrefix, shardId);
/*
* For MERGE SQL, use the USING clause list, the main query target list
* is NULL
*/
List *targetList = IsMergeQuery(modifyQueryViaCoordinatorOrRepartition) ?
selectRte->subquery->targetList :
modifyQueryViaCoordinatorOrRepartition->targetList;
/* generate the query on the intermediate result */
Query *resultSelectQuery = BuildSubPlanResultQuery(targetList,
columnAliasList,
resultId->data);
/* put the intermediate result query in the INSERT..SELECT */
selectRte->subquery = resultSelectQuery;
/* setting an alias simplifies deparsing of RETURNING */
if (insertRte->alias == NULL)
{
Alias *alias = makeAlias(CITUS_TABLE_ALIAS, NIL);
insertRte->alias = alias;
}
/*
* Generate a query string for the query that inserts into a shard and reads
* from an intermediate result.
*
* Since CTEs have already been converted to intermediate results, they need
* to removed from the query. Otherwise, worker queries include both
* intermediate results and CTEs in the query.
*/
modifyWithResultQuery->cteList = NIL;
deparse_shard_query(modifyWithResultQuery, targetRelationId, shardId,
queryString);
ereport(DEBUG2, (errmsg("distributed statement: %s", queryString->data)));
LockShardDistributionMetadata(shardId, ShareLock);
List *insertShardPlacementList = ActiveShardPlacementList(shardId);
RelationShard *relationShard = CitusMakeNode(RelationShard);
relationShard->relationId = targetShardInterval->relationId;
relationShard->shardId = targetShardInterval->shardId;
Task *modifyTask = CreateBasicTask(jobId, taskIdIndex, MODIFY_TASK,
queryString->data);
modifyTask->dependentTaskList = NIL;
modifyTask->anchorShardId = shardId;
modifyTask->taskPlacementList = insertShardPlacementList;
modifyTask->relationShardList = list_make1(relationShard);
modifyTask->replicationModel = targetCacheEntry->replicationModel;
taskList = lappend(taskList, modifyTask);
taskIdIndex++;
}
return taskList;
}
/*
* GenerateTaskListWithRedistributedResults returns a task list to insert given
* redistributedResults into the given target relation.
* redistributedResults[shardIndex] is list of cstrings each of which is
* a result name which should be inserted into
* targetRelation->sortedShardIntervalArray[shardIndex].
*/
List *
GenerateTaskListWithRedistributedResults(Query *modifyQueryViaCoordinatorOrRepartition,
CitusTableCacheEntry *targetRelation,
List **redistributedResults, bool
useBinaryFormat)
{
List *taskList = NIL;
/*
* Make a copy of the <MODIFY-SQL> ... SELECT. We'll repeatedly replace
* the subquery of modifyResultQuery for different intermediate results and
* then deparse it.
*/
Query *modifyResultQuery = copyObject(modifyQueryViaCoordinatorOrRepartition);
RangeTblEntry *insertRte = ExtractResultRelationRTE(modifyResultQuery);
Oid targetRelationId = targetRelation->relationId;
int shardCount = targetRelation->shardIntervalArrayLength;
int shardOffset = 0;
uint32 taskIdIndex = 1;
uint64 jobId = INVALID_JOB_ID;
RangeTblEntry *selectRte =
ExtractSourceResultRangeTableEntry(modifyResultQuery);
List *selectTargetList = selectRte->subquery->targetList;
for (shardOffset = 0; shardOffset < shardCount; shardOffset++)
{
ShardInterval *targetShardInterval =
targetRelation->sortedShardIntervalArray[shardOffset];
List *resultIdList = redistributedResults[targetShardInterval->shardIndex];
uint64 shardId = targetShardInterval->shardId;
StringInfo queryString = makeStringInfo();
/* skip empty tasks */
if (resultIdList == NIL)
{
continue;
}
/* sort result ids for consistent test output */
List *sortedResultIds = SortList(resultIdList, pg_qsort_strcmp);
/* generate the query on the intermediate result */
Query *fragmentSetQuery = BuildReadIntermediateResultsArrayQuery(selectTargetList,
NIL,
sortedResultIds,
useBinaryFormat);
/* put the intermediate result query in the INSERT..SELECT */
selectRte->subquery = fragmentSetQuery;
/* setting an alias simplifies deparsing of RETURNING */
if (insertRte->alias == NULL)
{
Alias *alias = makeAlias(CITUS_TABLE_ALIAS, NIL);
insertRte->alias = alias;
}
/*
* Generate a query string for the query that inserts into a shard and reads
* from an intermediate result.
*
* Since CTEs have already been converted to intermediate results, they need
* to removed from the query. Otherwise, worker queries include both
* intermediate results and CTEs in the query.
*/
modifyResultQuery->cteList = NIL;
deparse_shard_query(modifyResultQuery, targetRelationId, shardId, queryString);
ereport(DEBUG2, (errmsg("distributed statement: %s", queryString->data)));
LockShardDistributionMetadata(shardId, ShareLock);
List *insertShardPlacementList = ActiveShardPlacementList(shardId);
RelationShard *relationShard = CitusMakeNode(RelationShard);
relationShard->relationId = targetShardInterval->relationId;
relationShard->shardId = targetShardInterval->shardId;
Task *modifyTask = CreateBasicTask(jobId, taskIdIndex, MODIFY_TASK,
queryString->data);
modifyTask->dependentTaskList = NIL;
modifyTask->anchorShardId = shardId;
modifyTask->taskPlacementList = insertShardPlacementList;
modifyTask->relationShardList = list_make1(relationShard);
modifyTask->replicationModel = targetRelation->replicationModel;
taskList = lappend(taskList, modifyTask);
taskIdIndex++;
}
return taskList;
}

View File

@ -896,18 +896,11 @@ DeferErrorIfHasUnsupportedDependency(const ObjectAddress *objectAddress)
return NULL;
}
char *objectDescription = NULL;
char *dependencyDescription = NULL;
StringInfo errorInfo = makeStringInfo();
StringInfo detailInfo = makeStringInfo();
#if PG_VERSION_NUM >= PG_VERSION_14
objectDescription = getObjectDescription(objectAddress, false);
dependencyDescription = getObjectDescription(undistributableDependency, false);
#else
objectDescription = getObjectDescription(objectAddress);
dependencyDescription = getObjectDescription(undistributableDependency);
#endif
char *objectDescription = getObjectDescription(objectAddress, false);
char *dependencyDescription = getObjectDescription(undistributableDependency, false);
/*
* We expect callers to interpret the error returned from this function
@ -1192,6 +1185,47 @@ IsAnyObjectAddressOwnedByExtension(const List *targets,
}
/*
* FirstExtensionWithSchema returns the first extension address whose schema is the same
* as given schema. If no extension depends on the schema, it returns NULL.
* i.e. decide if given schema is an extension schema as in
* `CREATE EXTENSION <ext> [WITH] SCHEMA <schema>;`
*/
ObjectAddress *
FirstExtensionWithSchema(Oid schemaId)
{
ObjectAddress *extensionAddress = NULL;
Relation relation = table_open(ExtensionRelationId, AccessShareLock);
ScanKeyData entry[1];
ScanKeyInit(&entry[0], Anum_pg_extension_extnamespace, BTEqualStrategyNumber,
F_INT4EQ, schemaId);
SysScanDesc scan = systable_beginscan(relation, InvalidOid, false, NULL, 1, entry);
HeapTuple extensionTuple = systable_getnext(scan);
if (HeapTupleIsValid(extensionTuple))
{
int extensionIdIndex = Anum_pg_extension_oid;
TupleDesc tupleDescriptor = RelationGetDescr(relation);
bool isNull = false;
Datum extensionIdDatum = heap_getattr(extensionTuple, extensionIdIndex,
tupleDescriptor, &isNull);
Oid extensionId = DatumGetObjectId(extensionIdDatum);
extensionAddress = palloc0(sizeof(ObjectAddress));
extensionAddress->objectId = extensionId;
extensionAddress->classId = ExtensionRelationId;
extensionAddress->objectSubId = 0;
}
systable_endscan(scan);
table_close(relation, AccessShareLock);
return extensionAddress;
}
/*
* IsObjectAddressOwnedByCitus returns true if the given object address
* is owned by the citus or citus_columnar extensions.

View File

@ -85,10 +85,10 @@ citus_unmark_object_distributed(PG_FUNCTION_ARGS)
{
ereport(ERROR, (errmsg("object still exists"),
errdetail("the %s \"%s\" still exists",
getObjectTypeDescription_compat(&address,
getObjectTypeDescription(&address,
/* missingOk: */ false),
getObjectIdentity_compat(&address,
getObjectIdentity(&address,
/* missingOk: */ false)),
errhint("drop the object via a DROP command")));

View File

@ -178,6 +178,7 @@ typedef struct MetadataCacheData
Oid distColocationRelationId;
Oid distColocationConfigurationIndexId;
Oid distPartitionRelationId;
Oid distTenantSchemaRelationId;
Oid distPartitionLogicalRelidIndexId;
Oid distPartitionColocationidIndexId;
Oid distShardLogicalRelidIndexId;
@ -188,6 +189,8 @@ typedef struct MetadataCacheData
Oid distPlacementGroupidIndexId;
Oid distTransactionRelationId;
Oid distTransactionGroupIndexId;
Oid distTenantSchemaPrimaryKeyIndexId;
Oid distTenantSchemaUniqueColocationIdIndexId;
Oid citusCatalogNamespaceId;
Oid copyFormatTypeId;
Oid readIntermediateResultFuncId;
@ -508,11 +511,21 @@ IsCitusTableTypeInternal(char partitionMethod, char replicationModel,
return partitionMethod == DISTRIBUTE_BY_RANGE;
}
case SINGLE_SHARD_DISTRIBUTED:
{
return partitionMethod == DISTRIBUTE_BY_NONE &&
replicationModel != REPLICATION_MODEL_2PC &&
colocationId != INVALID_COLOCATION_ID;
}
case DISTRIBUTED_TABLE:
{
return partitionMethod == DISTRIBUTE_BY_HASH ||
partitionMethod == DISTRIBUTE_BY_RANGE ||
partitionMethod == DISTRIBUTE_BY_APPEND;
partitionMethod == DISTRIBUTE_BY_APPEND ||
(partitionMethod == DISTRIBUTE_BY_NONE &&
replicationModel != REPLICATION_MODEL_2PC &&
colocationId != INVALID_COLOCATION_ID);
}
case STRICTLY_PARTITIONED_DISTRIBUTED_TABLE:
@ -815,6 +828,21 @@ IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel,
}
/*
* IsSingleShardTableByDistParams returns true if given partitionMethod,
* replicationModel and colocationId would identify a single-shard distributed
* table that has a null shard key.
*/
bool
IsSingleShardTableByDistParams(char partitionMethod, char replicationModel,
uint32 colocationId)
{
return partitionMethod == DISTRIBUTE_BY_NONE &&
replicationModel != REPLICATION_MODEL_2PC &&
colocationId != INVALID_COLOCATION_ID;
}
/*
* CitusTableList returns a list that includes all the valid distributed table
* cache entries.
@ -2818,6 +2846,39 @@ DistColocationConfigurationIndexId(void)
}
/* return oid of pg_dist_schema relation */
Oid
DistTenantSchemaRelationId(void)
{
CachedRelationLookup("pg_dist_schema",
&MetadataCache.distTenantSchemaRelationId);
return MetadataCache.distTenantSchemaRelationId;
}
/* return oid of pg_dist_schema_pkey index */
Oid
DistTenantSchemaPrimaryKeyIndexId(void)
{
CachedRelationLookup("pg_dist_schema_pkey",
&MetadataCache.distTenantSchemaPrimaryKeyIndexId);
return MetadataCache.distTenantSchemaPrimaryKeyIndexId;
}
/* return oid of pg_dist_schema_unique_colocationid_index index */
Oid
DistTenantSchemaUniqueColocationIdIndexId(void)
{
CachedRelationLookup("pg_dist_schema_unique_colocationid_index",
&MetadataCache.distTenantSchemaUniqueColocationIdIndexId);
return MetadataCache.distTenantSchemaUniqueColocationIdIndexId;
}
/* return oid of pg_dist_partition relation */
Oid
DistPartitionRelationId(void)

View File

@ -40,6 +40,7 @@
#include "distributed/backend_data.h"
#include "distributed/citus_ruleutils.h"
#include "distributed/colocation_utils.h"
#include "distributed/tenant_schema_metadata.h"
#include "distributed/commands.h"
#include "distributed/deparser.h"
#include "distributed/distribution_column.h"
@ -60,6 +61,7 @@
#include "distributed/pg_dist_colocation.h"
#include "distributed/pg_dist_node.h"
#include "distributed/pg_dist_shard.h"
#include "distributed/pg_dist_schema.h"
#include "distributed/relation_access_tracking.h"
#include "distributed/remote_commands.h"
#include "distributed/resource_lock.h"
@ -144,6 +146,8 @@ static char * ColocationGroupCreateCommand(uint32 colocationId, int shardCount,
Oid distributionColumnType,
Oid distributionColumnCollation);
static char * ColocationGroupDeleteCommand(uint32 colocationId);
static char * RemoteSchemaIdExpressionById(Oid schemaId);
static char * RemoteSchemaIdExpressionByName(char *schemaName);
static char * RemoteTypeIdExpression(Oid typeId);
static char * RemoteCollationIdExpression(Oid colocationId);
@ -170,6 +174,8 @@ PG_FUNCTION_INFO_V1(citus_internal_update_relation_colocation);
PG_FUNCTION_INFO_V1(citus_internal_add_object_metadata);
PG_FUNCTION_INFO_V1(citus_internal_add_colocation_metadata);
PG_FUNCTION_INFO_V1(citus_internal_delete_colocation_metadata);
PG_FUNCTION_INFO_V1(citus_internal_add_tenant_schema);
PG_FUNCTION_INFO_V1(citus_internal_delete_tenant_schema);
static bool got_SIGTERM = false;
@ -515,7 +521,7 @@ ShouldSyncUserCommandForObject(ObjectAddress objectAddress)
/*
* ShouldSyncTableMetadata checks if the metadata of a distributed table should be
* propagated to metadata workers, i.e. the table is a hash distributed table or
* reference/citus local table.
* a Citus table that doesn't have shard key.
*/
bool
ShouldSyncTableMetadata(Oid relationId)
@ -537,10 +543,11 @@ ShouldSyncTableMetadata(Oid relationId)
/*
* ShouldSyncTableMetadataViaCatalog checks if the metadata of a distributed table should
* be propagated to metadata workers, i.e. the table is an MX table or reference table.
* ShouldSyncTableMetadataViaCatalog checks if the metadata of a Citus table should
* be propagated to metadata workers, i.e. the table is an MX table or Citus table
* that doesn't have shard key.
* Tables with streaming replication model (which means RF=1) and hash distribution are
* considered as MX tables while tables with none distribution are reference tables.
* considered as MX tables.
*
* ShouldSyncTableMetadataViaCatalog does not use the CitusTableCache and instead reads
* from catalog tables directly.
@ -686,7 +693,7 @@ DropMetadataSnapshotOnNode(WorkerNode *workerNode)
bool singleTransaction = true;
List *dropMetadataCommandList = DetachPartitionCommandList();
dropMetadataCommandList = lappend(dropMetadataCommandList,
BREAK_CITUS_TABLE_SEQUENCE_DEPENDENCY_COMMAND);
BREAK_ALL_CITUS_TABLE_SEQUENCE_DEPENDENCY_COMMAND);
dropMetadataCommandList = lappend(dropMetadataCommandList,
WorkerDropAllShellTablesCommand(singleTransaction));
dropMetadataCommandList = list_concat(dropMetadataCommandList,
@ -909,15 +916,9 @@ MarkObjectsDistributedCreateCommand(List *addresses,
int forceDelegation = list_nth_int(forceDelegations, currentObjectCounter);
List *names = NIL;
List *args = NIL;
char *objectType = NULL;
#if PG_VERSION_NUM >= PG_VERSION_14
objectType = getObjectTypeDescription(address, false);
char *objectType = getObjectTypeDescription(address, false);
getObjectIdentityParts(address, &names, &args, false);
#else
objectType = getObjectTypeDescription(address);
getObjectIdentityParts(address, &names, &args);
#endif
if (!isFirstObject)
{
@ -1080,7 +1081,7 @@ EnsureObjectMetadataIsSane(int distributionArgumentIndex, int colocationId)
/*
* DistributionCreateCommands generates a commands that can be
* executed to replicate the metadata for a distributed table.
* executed to replicate the metadata for a Citus table.
*/
char *
DistributionCreateCommand(CitusTableCacheEntry *cacheEntry)
@ -3701,12 +3702,14 @@ citus_internal_update_relation_colocation(PG_FUNCTION_ARGS)
"entry in pg_dist_partition.",
get_rel_name(relationId))));
}
else if (partitionMethod != DISTRIBUTE_BY_HASH)
else if (!IsCitusTableType(relationId, HASH_DISTRIBUTED) &&
!IsCitusTableType(relationId, SINGLE_SHARD_DISTRIBUTED))
{
/* connection from the coordinator operating on a shard */
ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("Updating colocation ids are only allowed for hash "
"distributed tables: %c", partitionMethod)));
"and single shard distributed tables: %c",
partitionMethod)));
}
int count = 1;
@ -3787,6 +3790,52 @@ citus_internal_delete_colocation_metadata(PG_FUNCTION_ARGS)
}
/*
* citus_internal_add_tenant_schema is an internal UDF to
* call InsertTenantSchemaLocally on a remote node.
*
* None of the parameters are allowed to be NULL. To set the colocation
* id to NULL in metadata, use INVALID_COLOCATION_ID.
*/
Datum
citus_internal_add_tenant_schema(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
PG_ENSURE_ARGNOTNULL(0, "schema_id");
Oid schemaId = PG_GETARG_OID(0);
PG_ENSURE_ARGNOTNULL(1, "colocation_id");
uint32 colocationId = PG_GETARG_INT32(1);
InsertTenantSchemaLocally(schemaId, colocationId);
PG_RETURN_VOID();
}
/*
* citus_internal_delete_tenant_schema is an internal UDF to
* call DeleteTenantSchemaLocally on a remote node.
*
* The schemaId parameter is not allowed to be NULL. Morever, input schema is
* expected to be dropped already because this function is called from Citus
* drop hook and only used to clean up metadata after the schema is dropped.
*/
Datum
citus_internal_delete_tenant_schema(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
PG_ENSURE_ARGNOTNULL(0, "schema_id");
Oid schemaId = PG_GETARG_OID(0);
DeleteTenantSchemaLocally(schemaId);
PG_RETURN_VOID();
}
/*
* SyncNewColocationGroup synchronizes a new pg_dist_colocation entry to a worker.
*/
@ -3936,6 +3985,72 @@ ColocationGroupDeleteCommand(uint32 colocationId)
}
/*
* TenantSchemaInsertCommand returns a command to call
* citus_internal_add_tenant_schema().
*/
char *
TenantSchemaInsertCommand(Oid schemaId, uint32 colocationId)
{
StringInfo command = makeStringInfo();
appendStringInfo(command,
"SELECT pg_catalog.citus_internal_add_tenant_schema(%s, %u)",
RemoteSchemaIdExpressionById(schemaId), colocationId);
return command->data;
}
/*
* TenantSchemaDeleteCommand returns a command to call
* citus_internal_delete_tenant_schema().
*/
char *
TenantSchemaDeleteCommand(char *schemaName)
{
StringInfo command = makeStringInfo();
appendStringInfo(command,
"SELECT pg_catalog.citus_internal_delete_tenant_schema(%s)",
RemoteSchemaIdExpressionByName(schemaName));
return command->data;
}
/*
* RemoteSchemaIdExpressionById returns an expression in text form that
* can be used to obtain the OID of the schema with given schema id on a
* different node when included in a query string.
*/
static char *
RemoteSchemaIdExpressionById(Oid schemaId)
{
char *schemaName = get_namespace_name(schemaId);
if (schemaName == NULL)
{
ereport(ERROR, (errmsg("schema with OID %u does not exist", schemaId)));
}
return RemoteSchemaIdExpressionByName(schemaName);
}
/*
* RemoteSchemaIdExpressionByName returns an expression in text form that
* can be used to obtain the OID of the schema with given schema name on a
* different node when included in a query string.
*/
static char *
RemoteSchemaIdExpressionByName(char *schemaName)
{
StringInfo regnamespaceExpr = makeStringInfo();
appendStringInfo(regnamespaceExpr, "%s::regnamespace",
quote_literal_cstr(quote_identifier(schemaName)));
return regnamespaceExpr->data;
}
/*
* SetMetadataSyncNodesFromNodeList sets list of nodes that needs to be metadata
* synced among given node list into metadataSyncContext.
@ -4235,6 +4350,22 @@ WorkerDropAllShellTablesCommand(bool singleTransaction)
}
/*
* WorkerDropSequenceDependencyCommand returns command to drop sequence dependencies for
* given table.
*/
char *
WorkerDropSequenceDependencyCommand(Oid relationId)
{
char *qualifiedTableName = generate_qualified_relation_name(relationId);
StringInfo breakSequenceDepCommand = makeStringInfo();
appendStringInfo(breakSequenceDepCommand,
BREAK_CITUS_TABLE_SEQUENCE_DEPENDENCY_COMMAND,
quote_literal_cstr(qualifiedTableName));
return breakSequenceDepCommand->data;
}
/*
* PropagateNodeWideObjectsCommandList is called during node activation to
* propagate any object that should be propagated for every node. These are
@ -4314,6 +4445,14 @@ SyncDistributedObjects(MetadataSyncContext *context)
SendDistTableMetadataCommands(context);
SendDistObjectCommands(context);
/*
* Commands to insert pg_dist_schema entries.
*
* Need to be done after syncing distributed objects because the schemas
* need to exist on the worker.
*/
SendTenantSchemaMetadataCommands(context);
/*
* After creating each table, handle the inter table relationship between
* those tables.
@ -4352,8 +4491,8 @@ SendNodeWideObjectsSyncCommands(MetadataSyncContext *context)
void
SendShellTableDeletionCommands(MetadataSyncContext *context)
{
/* break all sequence deps for citus tables and remove all shell tables */
char *breakSeqDepsCommand = BREAK_CITUS_TABLE_SEQUENCE_DEPENDENCY_COMMAND;
/* break all sequence deps for citus tables */
char *breakSeqDepsCommand = BREAK_ALL_CITUS_TABLE_SEQUENCE_DEPENDENCY_COMMAND;
SendOrCollectCommandListToActivatedNodes(context, list_make1(breakSeqDepsCommand));
/* remove shell tables */
@ -4386,6 +4525,10 @@ SendMetadataDeletionCommands(MetadataSyncContext *context)
/* remove pg_dist_colocation entries */
SendOrCollectCommandListToActivatedNodes(context, list_make1(DELETE_ALL_COLOCATION));
/* remove pg_dist_schema entries */
SendOrCollectCommandListToActivatedNodes(context,
list_make1(DELETE_ALL_TENANT_SCHEMAS));
}
@ -4485,6 +4628,53 @@ SendColocationMetadataCommands(MetadataSyncContext *context)
}
/*
* SendTenantSchemaMetadataCommands sends tenant schema metadata entries with
* transactional or nontransactional mode according to transactionMode inside
* metadataSyncContext.
*/
void
SendTenantSchemaMetadataCommands(MetadataSyncContext *context)
{
ScanKeyData scanKey[1];
int scanKeyCount = 0;
Relation pgDistTenantSchema = table_open(DistTenantSchemaRelationId(),
AccessShareLock);
SysScanDesc scanDesc = systable_beginscan(pgDistTenantSchema, InvalidOid, false, NULL,
scanKeyCount, scanKey);
MemoryContext oldContext = MemoryContextSwitchTo(context->context);
HeapTuple heapTuple = NULL;
while (true)
{
ResetMetadataSyncMemoryContext(context);
heapTuple = systable_getnext(scanDesc);
if (!HeapTupleIsValid(heapTuple))
{
break;
}
Form_pg_dist_schema tenantSchemaForm =
(Form_pg_dist_schema) GETSTRUCT(heapTuple);
StringInfo insertTenantSchemaCommand = makeStringInfo();
appendStringInfo(insertTenantSchemaCommand,
"SELECT pg_catalog.citus_internal_add_tenant_schema(%s, %u)",
RemoteSchemaIdExpressionById(tenantSchemaForm->schemaid),
tenantSchemaForm->colocationid);
List *commandList = list_make1(insertTenantSchemaCommand->data);
SendOrCollectCommandListToActivatedNodes(context, commandList);
}
MemoryContextSwitchTo(oldContext);
systable_endscan(scanDesc);
table_close(pgDistTenantSchema, AccessShareLock);
}
/*
* SendDependencyCreationCommands sends dependency creation commands to workers
* with transactional or nontransactional mode according to transactionMode

View File

@ -101,7 +101,7 @@ static char * GenerateAllShardStatisticsQueryForNode(WorkerNode *workerNode,
static List * GenerateShardStatisticsQueryList(List *workerNodeList, List *citusTableIds);
static void ErrorIfNotSuitableToGetSize(Oid relationId);
static List * OpenConnectionToNodes(List *workerNodeList);
static void ReceiveShardNameAndSizeResults(List *connectionList,
static void ReceiveShardIdAndSizeResults(List *connectionList,
Tuplestorestate *tupleStore,
TupleDesc tupleDescriptor);
static void AppendShardSizeQuery(StringInfo selectQuery, ShardInterval *shardInterval);
@ -253,7 +253,7 @@ GetNodeDiskSpaceStatsForConnection(MultiConnection *connection, uint64 *availabl
/*
* citus_shard_sizes returns all shard names and their sizes.
* citus_shard_sizes returns all shard ids and their sizes.
*/
Datum
citus_shard_sizes(PG_FUNCTION_ARGS)
@ -271,7 +271,7 @@ citus_shard_sizes(PG_FUNCTION_ARGS)
TupleDesc tupleDescriptor = NULL;
Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor);
ReceiveShardNameAndSizeResults(connectionList, tupleStore, tupleDescriptor);
ReceiveShardIdAndSizeResults(connectionList, tupleStore, tupleDescriptor);
PG_RETURN_VOID();
}
@ -446,11 +446,11 @@ GenerateShardStatisticsQueryList(List *workerNodeList, List *citusTableIds)
/*
* ReceiveShardNameAndSizeResults receives shard name and size results from the given
* ReceiveShardIdAndSizeResults receives shard id and size results from the given
* connection list.
*/
static void
ReceiveShardNameAndSizeResults(List *connectionList, Tuplestorestate *tupleStore,
ReceiveShardIdAndSizeResults(List *connectionList, Tuplestorestate *tupleStore,
TupleDesc tupleDescriptor)
{
MultiConnection *connection = NULL;
@ -488,13 +488,9 @@ ReceiveShardNameAndSizeResults(List *connectionList, Tuplestorestate *tupleStore
memset(values, 0, sizeof(values));
memset(isNulls, false, sizeof(isNulls));
/* format is [0] shard id, [1] shard name, [2] size */
char *tableName = PQgetvalue(result, rowIndex, 1);
Datum resultStringDatum = CStringGetDatum(tableName);
Datum textDatum = DirectFunctionCall1(textin, resultStringDatum);
values[0] = textDatum;
values[1] = ParseIntField(result, rowIndex, 2);
/* format is [0] shard id, [1] size */
values[0] = ParseIntField(result, rowIndex, 0);
values[1] = ParseIntField(result, rowIndex, 1);
tuplestore_putvalues(tupleStore, tupleDescriptor, values, isNulls);
}
@ -942,7 +938,7 @@ GenerateAllShardStatisticsQueryForNode(WorkerNode *workerNode, List *citusTableI
}
/* Add a dummy entry so that UNION ALL doesn't complain */
appendStringInfo(allShardStatisticsQuery, "SELECT 0::bigint, NULL::text, 0::bigint;");
appendStringInfo(allShardStatisticsQuery, "SELECT 0::bigint, 0::bigint;");
return allShardStatisticsQuery->data;
}
@ -986,7 +982,6 @@ AppendShardSizeQuery(StringInfo selectQuery, ShardInterval *shardInterval)
char *quotedShardName = quote_literal_cstr(shardQualifiedName);
appendStringInfo(selectQuery, "SELECT " UINT64_FORMAT " AS shard_id, ", shardId);
appendStringInfo(selectQuery, "%s AS shard_name, ", quotedShardName);
appendStringInfo(selectQuery, PG_TOTAL_RELATION_SIZE_FUNCTION, quotedShardName);
}
@ -2256,6 +2251,21 @@ EnsureTableOwner(Oid relationId)
}
/*
* Check that the current user has owner rights to schemaId, error out if
* not. Superusers are regarded as owners.
*/
void
EnsureSchemaOwner(Oid schemaId)
{
if (!pg_namespace_ownercheck(schemaId, GetUserId()))
{
aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA,
get_namespace_name(schemaId));
}
}
/*
* Check that the current user has owner rights to functionId, error out if
* not. Superusers are regarded as owners. Functions and procedures are
@ -2288,6 +2298,24 @@ EnsureHashDistributedTable(Oid relationId)
}
/*
* EnsureHashOrSingleShardDistributedTable error out if the given relation is not a
* hash or single shard distributed table with the given message.
*/
void
EnsureHashOrSingleShardDistributedTable(Oid relationId)
{
if (!IsCitusTableType(relationId, HASH_DISTRIBUTED) &&
!IsCitusTableType(relationId, SINGLE_SHARD_DISTRIBUTED))
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("relation %s should be a "
"hash or single shard distributed table",
get_rel_name(relationId))));
}
}
/*
* EnsureSuperUser check that the current user is a superuser and errors out if not.
*/
@ -4003,11 +4031,7 @@ CancelTasksForJob(int64 jobid)
errmsg("must be a superuser to cancel superuser tasks")));
}
else if (!has_privs_of_role(GetUserId(), taskOwner) &&
#if PG_VERSION_NUM >= 140000
!has_privs_of_role(GetUserId(), ROLE_PG_SIGNAL_BACKEND))
#else
!has_privs_of_role(GetUserId(), DEFAULT_ROLE_SIGNAL_BACKENDID))
#endif
{
/* user doesn't have the permissions to cancel this job */
ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),

View File

@ -36,6 +36,7 @@
#include "distributed/multi_join_order.h"
#include "distributed/multi_router_planner.h"
#include "distributed/pg_dist_node.h"
#include "distributed/pg_dist_node_metadata.h"
#include "distributed/reference_table_utils.h"
#include "distributed/remote_commands.h"
#include "distributed/resource_lock.h"
@ -119,7 +120,6 @@ static char * NodeMetadataSyncedUpdateCommand(uint32 nodeId, bool metadataSynced
static void ErrorIfCoordinatorMetadataSetFalse(WorkerNode *workerNode, Datum value,
char *field);
static WorkerNode * SetShouldHaveShards(WorkerNode *workerNode, bool shouldHaveShards);
static int FindCoordinatorNodeId(void);
static WorkerNode * FindNodeAnyClusterByNodeId(uint32 nodeId);
static void ErrorIfAnyNodeNotExist(List *nodeList);
static void UpdateLocalGroupIdsViaMetadataContext(MetadataSyncContext *context);
@ -1800,7 +1800,7 @@ FindNodeWithNodeId(int nodeId, bool missingOk)
/*
* FindCoordinatorNodeId returns the node id of the coordinator node
*/
static int
int
FindCoordinatorNodeId()
{
bool includeNodesFromOtherClusters = false;
@ -2871,15 +2871,15 @@ TupleToWorkerNode(TupleDesc tupleDescriptor, HeapTuple heapTuple)
*/
heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray);
char *nodeName = DatumGetCString(datumArray[Anum_pg_dist_node_nodename - 1]);
char *nodeRack = DatumGetCString(datumArray[Anum_pg_dist_node_noderack - 1]);
char *nodeName = TextDatumGetCString(datumArray[Anum_pg_dist_node_nodename - 1]);
char *nodeRack = TextDatumGetCString(datumArray[Anum_pg_dist_node_noderack - 1]);
WorkerNode *workerNode = (WorkerNode *) palloc0(sizeof(WorkerNode));
workerNode->nodeId = DatumGetUInt32(datumArray[Anum_pg_dist_node_nodeid - 1]);
workerNode->workerPort = DatumGetUInt32(datumArray[Anum_pg_dist_node_nodeport - 1]);
workerNode->groupId = DatumGetInt32(datumArray[Anum_pg_dist_node_groupid - 1]);
strlcpy(workerNode->workerName, TextDatumGetCString(nodeName), WORKER_LENGTH);
strlcpy(workerNode->workerRack, TextDatumGetCString(nodeRack), WORKER_LENGTH);
strlcpy(workerNode->workerName, nodeName, WORKER_LENGTH);
strlcpy(workerNode->workerRack, nodeRack, WORKER_LENGTH);
workerNode->hasMetadata = DatumGetBool(datumArray[Anum_pg_dist_node_hasmetadata - 1]);
workerNode->metadataSynced =
DatumGetBool(datumArray[Anum_pg_dist_node_metadatasynced - 1]);

View File

@ -217,9 +217,9 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool
List *insertedShardPlacements = NIL;
List *insertedShardIds = NIL;
/* make sure that tables are hash partitioned */
CheckHashPartitionedTable(targetRelationId);
CheckHashPartitionedTable(sourceRelationId);
CitusTableCacheEntry *targetCacheEntry = GetCitusTableCacheEntry(targetRelationId);
Assert(targetCacheEntry->partitionMethod == DISTRIBUTE_BY_HASH ||
targetCacheEntry->partitionMethod == DISTRIBUTE_BY_NONE);
/*
* In contrast to append/range partitioned tables it makes more sense to
@ -259,10 +259,20 @@ CreateColocatedShards(Oid targetRelationId, Oid sourceRelationId, bool
*newShardIdPtr = GetNextShardId();
insertedShardIds = lappend(insertedShardIds, newShardIdPtr);
text *shardMinValueText = NULL;
text *shardMaxValueText = NULL;
if (targetCacheEntry->partitionMethod == DISTRIBUTE_BY_NONE)
{
Assert(list_length(sourceShardIntervalList) == 1);
}
else
{
int32 shardMinValue = DatumGetInt32(sourceShardInterval->minValue);
int32 shardMaxValue = DatumGetInt32(sourceShardInterval->maxValue);
text *shardMinValueText = IntegerToText(shardMinValue);
text *shardMaxValueText = IntegerToText(shardMaxValue);
shardMinValueText = IntegerToText(shardMinValue);
shardMaxValueText = IntegerToText(shardMaxValue);
}
List *sourceShardPlacementList = ShardPlacementListSortedByWorker(
sourceShardId);
@ -362,6 +372,72 @@ CreateReferenceTableShard(Oid distributedTableId)
}
/*
* CreateSingleShardTableShardWithRoundRobinPolicy creates a single
* shard for the given distributedTableId. The created shard does not
* have min/max values. Unlike CreateReferenceTableShard, the shard is
* _not_ replicated to all nodes but would have a single placement like
* Citus local tables.
*
* However, this placement doesn't necessarily need to be placed on
* coordinator. This is determined based on modulo of the colocation
* id that given table has been associated to.
*/
void
CreateSingleShardTableShardWithRoundRobinPolicy(Oid relationId, uint32 colocationId)
{
EnsureTableOwner(relationId);
/* we plan to add shards: get an exclusive lock on relation oid */
LockRelationOid(relationId, ExclusiveLock);
/*
* Load and sort the worker node list for deterministic placement.
*
* Also take a RowShareLock on pg_dist_node to disallow concurrent
* node list changes that require an exclusive lock.
*/
List *workerNodeList = DistributedTablePlacementNodeList(RowShareLock);
workerNodeList = SortList(workerNodeList, CompareWorkerNodes);
int32 workerNodeCount = list_length(workerNodeList);
if (workerNodeCount == 0)
{
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("couldn't find any worker nodes"),
errhint("Add more worker nodes")));
}
char shardStorageType = ShardStorageType(relationId);
text *minHashTokenText = NULL;
text *maxHashTokenText = NULL;
uint64 shardId = GetNextShardId();
InsertShardRow(relationId, shardId, shardStorageType,
minHashTokenText, maxHashTokenText);
/* determine the node index based on colocation id */
int roundRobinNodeIdx = colocationId % workerNodeCount;
int replicationFactor = 1;
List *insertedShardPlacements = InsertShardPlacementRows(
relationId,
shardId,
workerNodeList,
roundRobinNodeIdx,
replicationFactor);
/*
* We don't need to force using exclusive connections because we're anyway
* creating a single shard.
*/
bool useExclusiveConnection = false;
bool colocatedShard = false;
CreateShardsOnWorkers(relationId, insertedShardPlacements,
useExclusiveConnection, colocatedShard);
}
/*
* CheckHashPartitionedTable looks up the partition information for the given
* tableId and checks if the table is hash partitioned. If not, the function

View File

@ -319,6 +319,7 @@ PG_FUNCTION_INFO_V1(citus_rebalance_wait);
bool RunningUnderIsolationTest = false;
int MaxRebalancerLoggedIgnoredMoves = 5;
int RebalancerByDiskSizeBaseCost = 100 * 1024 * 1024;
bool PropagateSessionSettingsForLoopbackConnection = false;
static const char *PlacementUpdateTypeNames[] = {
@ -515,6 +516,16 @@ GetRebalanceSteps(RebalanceOptions *options)
/* sort the lists to make the function more deterministic */
List *activeWorkerList = SortedActiveWorkers();
int shardAllowedNodeCount = 0;
WorkerNode *workerNode = NULL;
foreach_ptr(workerNode, activeWorkerList)
{
if (workerNode->shouldHaveShards)
{
shardAllowedNodeCount++;
}
}
List *activeShardPlacementListList = NIL;
List *unbalancedShards = NIL;
@ -532,8 +543,7 @@ GetRebalanceSteps(RebalanceOptions *options)
shardPlacementList, options->workerNode);
}
if (list_length(activeShardPlacementListForRelation) >= list_length(
activeWorkerList))
if (list_length(activeShardPlacementListForRelation) >= shardAllowedNodeCount)
{
activeShardPlacementListList = lappend(activeShardPlacementListList,
activeShardPlacementListForRelation);
@ -668,6 +678,8 @@ citus_shard_cost_by_disk_size(PG_FUNCTION_ARGS)
MemoryContextSwitchTo(oldContext);
MemoryContextReset(localContext);
colocationSizeInBytes += RebalancerByDiskSizeBaseCost;
if (colocationSizeInBytes <= 0)
{
PG_RETURN_FLOAT4(1);
@ -1169,6 +1181,11 @@ replicate_table_shards(PG_FUNCTION_ARGS)
ArrayType *excludedShardArray = PG_GETARG_ARRAYTYPE_P(3);
Oid shardReplicationModeOid = PG_GETARG_OID(4);
if (IsCitusTableType(relationId, SINGLE_SHARD_DISTRIBUTED))
{
ereport(ERROR, (errmsg("cannot replicate single shard tables' shards")));
}
char transferMode = LookupShardTransferMode(shardReplicationModeOid);
EnsureReferenceTablesExistOnAllNodesExtended(transferMode);
@ -2003,7 +2020,7 @@ GenerateTaskMoveDependencyList(PlacementUpdateEvent *move, int64 colocationId,
* overlaps with the current move's target node.
* The earlier/first move might make space for the later/second move.
* So we could run out of disk space (or at least overload the node)
* if we move the second shard to it before the first one is moved away. 
* if we move the second shard to it before the first one is moved away.
*/
ShardMoveSourceNodeHashEntry *shardMoveSourceNodeHashEntry = hash_search(
shardMoveDependencies.nodeDependencies, &move->targetNode->nodeId, HASH_FIND,
@ -2165,7 +2182,10 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
quote_literal_cstr(shardTranferModeLabel));
int32 nodesInvolved[] = { 0 };
BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data, 0,
/* replicate_reference_tables permissions require superuser */
Oid superUserId = CitusExtensionOwner();
BackgroundTask *task = ScheduleBackgroundTask(jobId, superUserId, buf.data, 0,
NULL, 0, nodesInvolved);
replicateRefTablesTaskId = task->taskid;
}
@ -2268,7 +2288,7 @@ UpdateShardPlacement(PlacementUpdateEvent *placementUpdateEvent,
if (updateType == PLACEMENT_UPDATE_MOVE)
{
appendStringInfo(placementUpdateCommand,
"SELECT citus_move_shard_placement(%ld,%u,%u,%s)",
"SELECT pg_catalog.citus_move_shard_placement(%ld,%u,%u,%s)",
shardId,
sourceNode->nodeId,
targetNode->nodeId,
@ -2277,7 +2297,7 @@ UpdateShardPlacement(PlacementUpdateEvent *placementUpdateEvent,
else if (updateType == PLACEMENT_UPDATE_COPY)
{
appendStringInfo(placementUpdateCommand,
"SELECT citus_copy_shard_placement(%ld,%u,%u,%s)",
"SELECT pg_catalog.citus_copy_shard_placement(%ld,%u,%u,%s)",
shardId,
sourceNode->nodeId,
targetNode->nodeId,

View File

@ -138,6 +138,13 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
errdetail("We currently don't support creating shards "
"on hash-partitioned tables")));
}
else if (IsCitusTableType(relationId, SINGLE_SHARD_DISTRIBUTED))
{
ereport(ERROR, (errmsg("relation \"%s\" is a single shard table",
relationName),
errdetail("We currently don't support creating shards "
"on single shard tables")));
}
else if (IsCitusTableType(relationId, REFERENCE_TABLE))
{
ereport(ERROR, (errmsg("relation \"%s\" is a reference table",
@ -521,7 +528,8 @@ RelationShardListForShardCreate(ShardInterval *shardInterval)
relationShard->shardId = shardInterval->shardId;
List *relationShardList = list_make1(relationShard);
if (IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) &&
if ((IsCitusTableTypeCacheEntry(cacheEntry, HASH_DISTRIBUTED) ||
IsCitusTableTypeCacheEntry(cacheEntry, SINGLE_SHARD_DISTRIBUTED)) &&
cacheEntry->colocationId != INVALID_COLOCATION_ID)
{
shardIndex = ShardIndex(shardInterval);
@ -855,7 +863,7 @@ ProcessShardStatisticsRow(PGresult *result, int64 rowIndex, uint64 *shardId,
return false;
}
*shardSize = ParseIntField(result, rowIndex, 2);
*shardSize = ParseIntField(result, rowIndex, 1);
return true;
}

View File

@ -31,11 +31,7 @@
#include "utils/guc.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
#if PG_VERSION_NUM < PG_VERSION_13
#include "utils/hashutils.h"
#else
#include "common/hashfn.h"
#endif
/* Config variables managed via guc.c */

View File

@ -527,7 +527,7 @@ LocalCopyToShard(ShardCopyDestReceiver *copyDest, CopyOutState localCopyOutState
false /* inFromCl */);
List *options = (isBinaryCopy) ? list_make1(binaryFormatOption) : NULL;
CopyFromState cstate = BeginCopyFrom_compat(pState, shard,
CopyFromState cstate = BeginCopyFrom(pState, shard,
NULL /* whereClause */,
NULL /* fileName */,
false /* is_program */,

View File

@ -43,7 +43,7 @@ static bool contain_dml_walker(Node *node, void *context);
/* the following utility functions are related to Citus' logic */
static bool RecursivelyInlineCteWalker(Node *node, void *context);
static void InlineCTEsInQueryTree(Query *query);
static bool QueryTreeContainsInlinableCteWalker(Node *node);
static bool QueryTreeContainsInlinableCteWalker(Node *node, void *context);
/*
@ -135,7 +135,7 @@ InlineCTEsInQueryTree(Query *query)
bool
QueryTreeContainsInlinableCTE(Query *queryTree)
{
return QueryTreeContainsInlinableCteWalker((Node *) queryTree);
return QueryTreeContainsInlinableCteWalker((Node *) queryTree, NULL);
}
@ -144,7 +144,7 @@ QueryTreeContainsInlinableCTE(Query *queryTree)
* the (sub)queries in the node contains at least one CTE.
*/
static bool
QueryTreeContainsInlinableCteWalker(Node *node)
QueryTreeContainsInlinableCteWalker(Node *node, void *context)
{
if (node == NULL)
{

View File

@ -925,6 +925,10 @@ GetRouterPlanType(Query *query, Query *originalQuery, bool hasUnresolvedParams)
}
else if (IsMergeQuery(originalQuery))
{
if (hasUnresolvedParams)
{
return REPLAN_WITH_BOUND_PARAMETERS;
}
return MERGE_QUERY;
}
else
@ -990,7 +994,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
case MERGE_QUERY:
{
distributedPlan =
CreateMergePlan(originalQuery, query, plannerRestrictionContext);
CreateMergePlan(planId, originalQuery, query, plannerRestrictionContext,
boundParams);
break;
}
@ -1377,6 +1382,12 @@ FinalizePlan(PlannedStmt *localPlan, DistributedPlan *distributedPlan)
break;
}
case MULTI_EXECUTOR_NON_PUSHABLE_MERGE_QUERY:
{
customScan->methods = &NonPushableMergeCommandCustomScanMethods;
break;
}
default:
{
customScan->methods = &DelayedErrorCustomScanMethods;
@ -2462,6 +2473,18 @@ HasUnresolvedExternParamsWalker(Node *expression, ParamListInfo boundParams)
}
/*
* ContainsSingleShardTable returns true if given query contains reference
* to a single-shard table.
*/
bool
ContainsSingleShardTable(Query *query)
{
RTEListProperties *rteListProperties = GetRTEListPropertiesForQuery(query);
return rteListProperties->hasSingleShardDistTable;
}
/*
* GetRTEListPropertiesForQuery is a wrapper around GetRTEListProperties that
* returns RTEListProperties for the rte list retrieved from query.
@ -2538,6 +2561,15 @@ GetRTEListProperties(List *rangeTableList)
else if (IsCitusTableTypeCacheEntry(cacheEntry, DISTRIBUTED_TABLE))
{
rteListProperties->hasDistributedTable = true;
if (!HasDistributionKeyCacheEntry(cacheEntry))
{
rteListProperties->hasSingleShardDistTable = true;
}
else
{
rteListProperties->hasDistTableWithShardKey = true;
}
}
else
{

View File

@ -108,7 +108,7 @@ GeneratePlaceHolderPlannedStmt(Query *parse)
Node *distKey PG_USED_FOR_ASSERTS_ONLY = NULL;
AssertArg(FastPathRouterQuery(parse, &distKey));
Assert(FastPathRouterQuery(parse, &distKey));
/* there is only a single relation rte */
scanNode->scanrelid = 1;
@ -212,6 +212,16 @@ FastPathRouterQuery(Query *query, Node **distributionKeyValue)
return false;
}
/*
* If the table doesn't have a distribution column, we don't need to
* check anything further.
*/
Var *distributionKey = PartitionColumn(distributedTableId, 1);
if (!distributionKey)
{
return true;
}
/* WHERE clause should not be empty for distributed tables */
if (joinTree == NULL ||
(IsCitusTableTypeCacheEntry(cacheEntry, DISTRIBUTED_TABLE) && joinTree->quals ==
@ -220,13 +230,6 @@ FastPathRouterQuery(Query *query, Node **distributionKeyValue)
return false;
}
/* if that's a reference table, we don't need to check anything further */
Var *distributionKey = PartitionColumn(distributedTableId, 1);
if (!distributionKey)
{
return true;
}
/* convert list of expressions into expression tree for further processing */
quals = joinTree->quals;
if (quals != NULL && IsA(quals, List))

View File

@ -116,7 +116,6 @@ contain_param_walker(Node *node, void *context)
PlannedStmt *
TryToDelegateFunctionCall(DistributedPlanningContext *planContext)
{
bool colocatedWithReferenceTable = false;
ShardPlacement *placement = NULL;
struct ParamWalkerContext walkerParamContext = { 0 };
bool inTransactionBlock = false;
@ -337,7 +336,7 @@ TryToDelegateFunctionCall(DistributedPlanningContext *planContext)
if (!procedure->forceDelegation)
{
/* cannot delegate function calls in a multi-statement transaction */
ereport(DEBUG1, (errmsg("not pushing down function calls in "
ereport(DEBUG4, (errmsg("not pushing down function calls in "
"a multi-statement transaction")));
return NULL;
}
@ -388,17 +387,10 @@ TryToDelegateFunctionCall(DistributedPlanningContext *planContext)
Oid colocatedRelationId = ColocatedTableId(procedure->colocationId);
if (colocatedRelationId == InvalidOid)
{
ereport(DEBUG1, (errmsg("function does not have co-located tables")));
ereport(DEBUG4, (errmsg("function does not have co-located tables")));
return NULL;
}
CitusTableCacheEntry *distTable = GetCitusTableCacheEntry(colocatedRelationId);
Var *partitionColumn = distTable->partitionColumn;
if (partitionColumn == NULL)
{
colocatedWithReferenceTable = true;
}
/*
* This can be called in queries like SELECT ... WHERE EXISTS(SELECT func()), or other
* forms of CTEs or subqueries. We don't push-down in those cases.
@ -410,14 +402,20 @@ TryToDelegateFunctionCall(DistributedPlanningContext *planContext)
return NULL;
}
if (colocatedWithReferenceTable)
CitusTableCacheEntry *distTable = GetCitusTableCacheEntry(colocatedRelationId);
if (IsCitusTableType(colocatedRelationId, REFERENCE_TABLE))
{
placement = ShardPlacementForFunctionColocatedWithReferenceTable(distTable);
}
else if (IsCitusTableType(colocatedRelationId, SINGLE_SHARD_DISTRIBUTED))
{
placement = ShardPlacementForFunctionColocatedWithSingleShardTable(distTable);
}
else
{
placement = ShardPlacementForFunctionColocatedWithDistTable(procedure,
funcExpr->args,
distTable->
partitionColumn,
distTable,
planContext->plan);
@ -570,6 +568,34 @@ ShardPlacementForFunctionColocatedWithDistTable(DistObjectCacheEntry *procedure,
}
/*
* ShardPlacementForFunctionColocatedWithSingleShardTable decides on a placement
* for delegating a function call that reads from a single shard table.
*/
ShardPlacement *
ShardPlacementForFunctionColocatedWithSingleShardTable(CitusTableCacheEntry *cacheEntry)
{
const ShardInterval *shardInterval = cacheEntry->sortedShardIntervalArray[0];
if (shardInterval == NULL)
{
ereport(DEBUG1, (errmsg("cannot push down call, failed to find shard interval")));
return NULL;
}
List *placementList = ActiveShardPlacementList(shardInterval->shardId);
if (list_length(placementList) != 1)
{
/* punt on this for now */
ereport(DEBUG1, (errmsg(
"cannot push down function call for replicated distributed tables")));
return NULL;
}
return (ShardPlacement *) linitial(placementList);
}
/*
* ShardPlacementForFunctionColocatedWithReferenceTable decides on a placement for delegating
* a function call that reads from a reference table.

View File

@ -31,6 +31,7 @@
#include "distributed/pg_dist_partition.h"
#include "distributed/query_pushdown_planning.h"
#include "distributed/recursive_planning.h"
#include "distributed/repartition_executor.h"
#include "distributed/resource_lock.h"
#include "distributed/version_compat.h"
#include "nodes/makefuncs.h"
@ -73,9 +74,9 @@ static List * CreateTargetListForCombineQuery(List *targetList);
static DeferredErrorMessage * DistributedInsertSelectSupported(Query *queryTree,
RangeTblEntry *insertRte,
RangeTblEntry *subqueryRte,
bool allReferenceTables);
static DeferredErrorMessage * MultiTaskRouterSelectQuerySupported(Query *query);
static bool HasUnsupportedDistinctOn(Query *query);
bool allReferenceTables,
PlannerRestrictionContext *
plannerRestrictionContext);
static DeferredErrorMessage * InsertPartitionColumnMatchesSelect(Query *query,
RangeTblEntry *insertRte,
RangeTblEntry *
@ -85,7 +86,6 @@ static DeferredErrorMessage * InsertPartitionColumnMatchesSelect(Query *query,
static DistributedPlan * CreateNonPushableInsertSelectPlan(uint64 planId, Query *parse,
ParamListInfo boundParams);
static DeferredErrorMessage * NonPushableInsertSelectSupported(Query *insertSelectQuery);
static Query * WrapSubquery(Query *subquery);
static void RelabelTargetEntryList(List *selectTargetList, List *insertTargetList);
static List * AddInsertSelectCasts(List *insertTargetList, List *selectTargetList,
Oid targetRelationId);
@ -292,7 +292,8 @@ CreateDistributedInsertSelectPlan(Query *originalQuery,
distributedPlan->planningError = DistributedInsertSelectSupported(originalQuery,
insertRte,
subqueryRte,
allReferenceTables);
allReferenceTables,
plannerRestrictionContext);
if (distributedPlan->planningError)
{
return distributedPlan;
@ -613,14 +614,15 @@ CreateTargetListForCombineQuery(List *targetList)
*/
static DeferredErrorMessage *
DistributedInsertSelectSupported(Query *queryTree, RangeTblEntry *insertRte,
RangeTblEntry *subqueryRte, bool allReferenceTables)
RangeTblEntry *subqueryRte, bool allReferenceTables,
PlannerRestrictionContext *plannerRestrictionContext)
{
Oid selectPartitionColumnTableId = InvalidOid;
Oid targetRelationId = insertRte->relid;
ListCell *rangeTableCell = NULL;
/* we only do this check for INSERT ... SELECT queries */
AssertArg(InsertSelectIntoCitusTable(queryTree));
Assert(InsertSelectIntoCitusTable(queryTree));
Query *subquery = subqueryRte->subquery;
@ -687,8 +689,16 @@ DistributedInsertSelectSupported(Query *queryTree, RangeTblEntry *insertRte,
NULL, NULL);
}
/* we don't support LIMIT, OFFSET and WINDOW functions */
DeferredErrorMessage *error = MultiTaskRouterSelectQuerySupported(subquery);
/* first apply toplevel pushdown checks to SELECT query */
DeferredErrorMessage *error = DeferErrorIfUnsupportedSubqueryPushdown(subquery,
plannerRestrictionContext);
if (error)
{
return error;
}
/* then apply subquery pushdown checks to SELECT query */
error = DeferErrorIfCannotPushdownSubquery(subquery, false);
if (error)
{
return error;
@ -730,6 +740,8 @@ DistributedInsertSelectSupported(Query *queryTree, RangeTblEntry *insertRte,
"table", NULL, NULL);
}
if (HasDistributionKey(targetRelationId))
{
/* ensure that INSERT's partition column comes from SELECT's partition column */
error = InsertPartitionColumnMatchesSelect(queryTree, insertRte, subqueryRte,
&selectPartitionColumnTableId);
@ -737,20 +749,21 @@ DistributedInsertSelectSupported(Query *queryTree, RangeTblEntry *insertRte,
{
return error;
}
}
}
/*
* We expect partition column values come from colocated tables. Note that we
* skip this check from the reference table case given that all reference tables
* are already (and by default) co-located.
*/
if (!TablesColocated(insertRte->relid, selectPartitionColumnTableId))
/* All tables in source list and target table should be colocated. */
List *distributedRelationIdList = DistributedRelationIdList(subquery);
distributedRelationIdList = lappend_oid(distributedRelationIdList,
targetRelationId);
if (!AllDistributedRelationsInListColocated(distributedRelationIdList))
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"INSERT target table and the source relation of the SELECT partition "
"column value must be colocated in distributed INSERT ... SELECT",
"INSERT target relation and all source relations of the "
"SELECT must be colocated in distributed INSERT ... SELECT",
NULL, NULL);
}
}
return NULL;
}
@ -848,7 +861,7 @@ RouterModifyTaskForShardInterval(Query *originalQuery,
* Note that this is only the case with PG14 as the parameter doesn't exist
* prior to that.
*/
shardRestrictionList = make_simple_restrictinfo_compat(NULL,
shardRestrictionList = make_simple_restrictinfo(NULL,
(Expr *) shardOpExpressions);
extendedBaseRestrictInfo = lappend(extendedBaseRestrictInfo,
shardRestrictionList);
@ -867,7 +880,7 @@ RouterModifyTaskForShardInterval(Query *originalQuery,
*/
RTEListProperties *subqueryRteListProperties = GetRTEListPropertiesForQuery(
copiedSubquery);
if (subqueryRteListProperties->hasDistributedTable)
if (subqueryRteListProperties->hasDistTableWithShardKey)
{
AddPartitionKeyNotNullFilterToSelect(copiedSubquery);
}
@ -1107,152 +1120,6 @@ ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte,
}
/*
* MultiTaskRouterSelectQuerySupported returns NULL if the query may be used
* as the source for an INSERT ... SELECT or returns a description why not.
*/
static DeferredErrorMessage *
MultiTaskRouterSelectQuerySupported(Query *query)
{
List *queryList = NIL;
ListCell *queryCell = NULL;
StringInfo errorDetail = NULL;
bool hasUnsupportedDistinctOn = false;
ExtractQueryWalker((Node *) query, &queryList);
foreach(queryCell, queryList)
{
Query *subquery = (Query *) lfirst(queryCell);
Assert(subquery->commandType == CMD_SELECT);
/* pushing down rtes without relations yields (shardCount * expectedRows) */
if (HasEmptyJoinTree(subquery))
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"Subqueries without relations are not allowed in "
"distributed INSERT ... SELECT queries",
NULL, NULL);
}
/* pushing down limit per shard would yield wrong results */
if (subquery->limitCount != NULL)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"LIMIT clauses are not allowed in distributed INSERT "
"... SELECT queries",
NULL, NULL);
}
/* pushing down limit offest per shard would yield wrong results */
if (subquery->limitOffset != NULL)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"OFFSET clauses are not allowed in distributed "
"INSERT ... SELECT queries",
NULL, NULL);
}
/* group clause list must include partition column */
if (subquery->groupClause)
{
List *groupClauseList = subquery->groupClause;
List *targetEntryList = subquery->targetList;
List *groupTargetEntryList = GroupTargetEntryList(groupClauseList,
targetEntryList);
bool groupOnPartitionColumn = TargetListOnPartitionColumn(subquery,
groupTargetEntryList);
if (!groupOnPartitionColumn)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"Group by list without distribution column is "
"not allowed in distributed INSERT ... "
"SELECT queries",
NULL, NULL);
}
}
/*
* We support window functions when the window function
* is partitioned on distribution column.
*/
if (subquery->windowClause && !SafeToPushdownWindowFunction(subquery,
&errorDetail))
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, errorDetail->data, NULL,
NULL);
}
if (subquery->setOperations != NULL)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"Set operations are not allowed in distributed "
"INSERT ... SELECT queries",
NULL, NULL);
}
/*
* We currently do not support grouping sets since it could generate NULL
* results even after the restrictions are applied to the query. A solution
* would be to add the whole query into a subquery and add the restrictions
* on that subquery.
*/
if (subquery->groupingSets != NULL)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"grouping sets are not allowed in distributed "
"INSERT ... SELECT queries",
NULL, NULL);
}
/*
* We don't support DISTINCT ON clauses on non-partition columns.
*/
hasUnsupportedDistinctOn = HasUnsupportedDistinctOn(subquery);
if (hasUnsupportedDistinctOn)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"DISTINCT ON (non-partition column) clauses are not "
"allowed in distributed INSERT ... SELECT queries",
NULL, NULL);
}
}
return NULL;
}
/*
* HasUnsupportedDistinctOn returns true if the query has distinct on and
* distinct targets do not contain partition column.
*/
static bool
HasUnsupportedDistinctOn(Query *query)
{
ListCell *distinctCell = NULL;
if (!query->hasDistinctOn)
{
return false;
}
foreach(distinctCell, query->distinctClause)
{
SortGroupClause *distinctClause = lfirst(distinctCell);
TargetEntry *distinctEntry = get_sortgroupclause_tle(distinctClause,
query->targetList);
bool skipOuterVars = true;
if (IsPartitionColumn(distinctEntry->expr, query, skipOuterVars))
{
return false;
}
}
return true;
}
/*
* InsertPartitionColumnMatchesSelect returns NULL the partition column in the
* table targeted by INSERTed matches with the any of the SELECTed table's
@ -1537,11 +1404,22 @@ CreateNonPushableInsertSelectPlan(uint64 planId, Query *parse, ParamListInfo bou
bool repartitioned = IsRedistributablePlan(selectPlan->planTree) &&
IsSupportedRedistributionTarget(targetRelationId);
distributedPlan->insertSelectQuery = insertSelectQuery;
distributedPlan->selectPlanForInsertSelect = selectPlan;
distributedPlan->insertSelectMethod = repartitioned ?
INSERT_SELECT_REPARTITION :
INSERT_SELECT_VIA_COORDINATOR;
/*
* It's not possible to generate a distributed plan for a SELECT
* having more than one tasks if it references a single-shard table.
*
* For this reason, right now we don't expect an INSERT .. SELECT
* query to go through the repartitioned INSERT .. SELECT logic if the
* SELECT query references a single-shard table.
*/
Assert(!repartitioned ||
!ContainsSingleShardTable(selectQueryCopy));
distributedPlan->modifyQueryViaCoordinatorOrRepartition = insertSelectQuery;
distributedPlan->selectPlanForModifyViaCoordinatorOrRepartition = selectPlan;
distributedPlan->modifyWithSelectMethod = repartitioned ?
MODIFY_WITH_SELECT_REPARTITION :
MODIFY_WITH_SELECT_VIA_COORDINATOR;
distributedPlan->expectResults = insertSelectQuery->returningList != NIL;
distributedPlan->intermediateResultIdPrefix = InsertSelectResultIdPrefix(planId);
distributedPlan->targetRelationId = targetRelationId;
@ -1598,7 +1476,7 @@ InsertSelectResultIdPrefix(uint64 planId)
* WrapSubquery wraps the given query as a subquery in a newly constructed
* "SELECT * FROM (...subquery...) citus_insert_select_subquery" query.
*/
static Query *
Query *
WrapSubquery(Query *subquery)
{
ParseState *pstate = make_parsestate(NULL);

View File

@ -69,7 +69,7 @@ FindSubPlanUsages(DistributedPlan *plan)
SUBPLAN_ACCESS_REMOTE);
}
if (plan->insertSelectQuery != NULL)
if (plan->modifyQueryViaCoordinatorOrRepartition != NULL)
{
/* INSERT..SELECT plans currently do not have a workerJob */
Assert(plan->workerJob == NULL);
@ -79,7 +79,8 @@ FindSubPlanUsages(DistributedPlan *plan)
* perform pruning. We therefore require all subplans used in the
* INSERT..SELECT to be available all nodes.
*/
remoteSubPlans = FindSubPlansUsedInNode((Node *) plan->insertSelectQuery,
remoteSubPlans =
FindSubPlansUsedInNode((Node *) plan->modifyQueryViaCoordinatorOrRepartition,
SUBPLAN_ACCESS_ANYWHERE);
}

View File

@ -485,6 +485,8 @@ RequiredAttrNumbersForRelation(RangeTblEntry *rangeTableEntry,
PlannerInfo *plannerInfo = relationRestriction->plannerInfo;
int rteIndex = relationRestriction->index;
/*
* Here we used the query from plannerInfo because it has the optimizations
* so that it doesn't have unnecessary columns. The original query doesn't have
@ -492,8 +494,18 @@ RequiredAttrNumbersForRelation(RangeTblEntry *rangeTableEntry,
* 'required' attributes.
*/
Query *queryToProcess = plannerInfo->parse;
int rteIndex = relationRestriction->index;
return RequiredAttrNumbersForRelationInternal(queryToProcess, rteIndex);
}
/*
* RequiredAttrNumbersForRelationInternal returns the required attribute numbers
* for the input range-table-index in the query parameter.
*/
List *
RequiredAttrNumbersForRelationInternal(Query *queryToProcess, int rteIndex)
{
List *allVarsInQuery = pull_vars_of_level((Node *) queryToProcess, 0);
List *requiredAttrNumbers = NIL;

File diff suppressed because it is too large Load Diff

View File

@ -33,6 +33,7 @@
#include "distributed/insert_select_planner.h"
#include "distributed/insert_select_executor.h"
#include "distributed/listutils.h"
#include "distributed/merge_planner.h"
#include "distributed/multi_executor.h"
#include "distributed/multi_explain.h"
#include "distributed/multi_logical_optimizer.h"
@ -234,7 +235,7 @@ NonPushableInsertSelectExplainScan(CustomScanState *node, List *ancestors,
{
CitusScanState *scanState = (CitusScanState *) node;
DistributedPlan *distributedPlan = scanState->distributedPlan;
Query *insertSelectQuery = distributedPlan->insertSelectQuery;
Query *insertSelectQuery = distributedPlan->modifyQueryViaCoordinatorOrRepartition;
RangeTblEntry *selectRte = ExtractSelectRangeTableEntry(insertSelectQuery);
/*
@ -244,8 +245,8 @@ NonPushableInsertSelectExplainScan(CustomScanState *node, List *ancestors,
*/
Query *queryCopy = copyObject(selectRte->subquery);
bool repartition = distributedPlan->insertSelectMethod == INSERT_SELECT_REPARTITION;
bool repartition =
distributedPlan->modifyWithSelectMethod == MODIFY_WITH_SELECT_REPARTITION;
if (es->analyze)
{
@ -281,6 +282,67 @@ NonPushableInsertSelectExplainScan(CustomScanState *node, List *ancestors,
}
/*
* NonPushableMergeSqlExplainScan is a custom scan explain callback function
* which is used to print explain information of a Citus plan for MERGE INTO
* distributed_table USING (source query/table), where source can be any query
* whose results are repartitioned to colocated with the target table.
*/
void
NonPushableMergeCommandExplainScan(CustomScanState *node, List *ancestors,
struct ExplainState *es)
{
CitusScanState *scanState = (CitusScanState *) node;
DistributedPlan *distributedPlan = scanState->distributedPlan;
Query *mergeQuery = distributedPlan->modifyQueryViaCoordinatorOrRepartition;
RangeTblEntry *sourceRte = ExtractMergeSourceRangeTableEntry(mergeQuery);
/*
* Create a copy because ExplainOneQuery can modify the query, and later
* executions of prepared statements might require it. See
* https://github.com/citusdata/citus/issues/3947 for what can happen.
*/
Query *sourceQueryCopy = copyObject(sourceRte->subquery);
bool repartition =
distributedPlan->modifyWithSelectMethod == MODIFY_WITH_SELECT_REPARTITION;
if (es->analyze)
{
ereport(ERROR, (errmsg("EXPLAIN ANALYZE is currently not supported for "
"MERGE INTO ... commands with repartitioning")));
}
Oid targetRelationId = ModifyQueryResultRelationId(mergeQuery);
StringInfo mergeMethodMessage = makeStringInfo();
appendStringInfo(mergeMethodMessage,
"MERGE INTO %s method", get_rel_name(targetRelationId));
if (repartition)
{
ExplainPropertyText(mergeMethodMessage->data, "repartition", es);
}
else
{
ExplainPropertyText(mergeMethodMessage->data, "pull to coordinator", es);
}
ExplainOpenGroup("Source Query", "Source Query", false, es);
/* explain the MERGE source query */
IntoClause *into = NULL;
ParamListInfo params = NULL;
/*
* With PG14, we need to provide a string here, for now we put an empty
* string, which is valid according to postgres.
*/
char *queryString = pstrdup("");
ExplainOneQuery(sourceQueryCopy, 0, into, es, queryString, params, NULL);
ExplainCloseGroup("Source Query", "Source Query", false, es);
}
/*
* ExplainSubPlans generates EXPLAIN output for subplans for CTEs
* and complex subqueries. Because the planning for these queries
@ -1039,7 +1101,7 @@ worker_save_query_explain_analyze(PG_FUNCTION_ARGS)
TupleDesc tupleDescriptor = NULL;
Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor);
DestReceiver *tupleStoreDest = CreateTuplestoreDestReceiver();
SetTuplestoreDestReceiverParams_compat(tupleStoreDest, tupleStore,
SetTuplestoreDestReceiverParams(tupleStoreDest, tupleStore,
CurrentMemoryContext, false, NULL, NULL);
List *parseTreeList = pg_parse_query(queryString);
@ -1064,15 +1126,9 @@ worker_save_query_explain_analyze(PG_FUNCTION_ARGS)
Query *analyzedQuery = parse_analyze_varparams_compat(parseTree, queryString,
&paramTypes, &numParams, NULL);
#if PG_VERSION_NUM >= PG_VERSION_14
/* pg_rewrite_query is a wrapper around QueryRewrite with some debugging logic */
List *queryList = pg_rewrite_query(analyzedQuery);
#else
/* pg_rewrite_query is not yet public in PostgreSQL 13 */
List *queryList = QueryRewrite(analyzedQuery);
#endif
if (list_length(queryList) != 1)
{
ereport(ERROR, (errmsg("cannot EXPLAIN ANALYZE a query rewritten "

View File

@ -81,8 +81,6 @@ static JoinOrderNode * CartesianProductReferenceJoin(JoinOrderNode *joinNode,
JoinType joinType);
static JoinOrderNode * LocalJoin(JoinOrderNode *joinNode, TableEntry *candidateTable,
List *applicableJoinClauses, JoinType joinType);
static bool JoinOnColumns(List *currentPartitionColumnList, Var *candidatePartitionColumn,
List *joinClauseList);
static JoinOrderNode * SinglePartitionJoin(JoinOrderNode *joinNode,
TableEntry *candidateTable,
List *applicableJoinClauses,
@ -212,7 +210,7 @@ ExtractLeftMostRangeTableIndex(Node *node, int *rangeTableIndex)
/*
* JoinOnColumns determines whether two columns are joined by a given join clause list.
*/
static bool
bool
JoinOnColumns(List *currentPartitionColumnList, Var *candidateColumn,
List *joinClauseList)
{
@ -1404,7 +1402,7 @@ DistPartitionKeyOrError(Oid relationId)
if (partitionKey == NULL)
{
ereport(ERROR, (errmsg(
"no distribution column found for relation %d, because it is a reference table",
"no distribution column found for relation %d",
relationId)));
}

View File

@ -1855,11 +1855,7 @@ MasterAggregateExpression(Aggref *originalAggregate,
{
/* array_cat_agg() takes anyarray as input */
catAggregateName = ARRAY_CAT_AGGREGATE_NAME;
#if PG_VERSION_NUM >= PG_VERSION_14
catInputType = ANYCOMPATIBLEARRAYOID;
#else
catInputType = ANYARRAYOID;
#endif
}
else if (aggregateType == AGGREGATE_JSONB_AGG ||
aggregateType == AGGREGATE_JSONB_OBJECT_AGG)
@ -1897,8 +1893,6 @@ MasterAggregateExpression(Aggref *originalAggregate,
if (aggregateType == AGGREGATE_ARRAY_AGG)
{
#if PG_VERSION_NUM >= PG_VERSION_14
/*
* Postgres expects the type of the array here such as INT4ARRAYOID.
* Hence we set it to workerReturnType. If we set this to
@ -1906,9 +1900,6 @@ MasterAggregateExpression(Aggref *originalAggregate,
* "argument declared anycompatiblearray is not an array but type anycompatiblearray"
*/
newMasterAggregate->aggargtypes = list_make1_oid(workerReturnType);
#else
newMasterAggregate->aggargtypes = list_make1_oid(ANYARRAYOID);
#endif
}
else
{
@ -2985,7 +2976,7 @@ AppendTargetEntryToGroupClause(TargetEntry *targetEntry,
Expr *targetExpr PG_USED_FOR_ASSERTS_ONLY = targetEntry->expr;
/* we currently only support appending Var target entries */
AssertArg(IsA(targetExpr, Var));
Assert(IsA(targetExpr, Var));
Var *targetColumn = (Var *) targetEntry->expr;
SortGroupClause *groupByClause = CreateSortGroupClause(targetColumn);
@ -3625,7 +3616,7 @@ static Oid
CitusFunctionOidWithSignature(char *functionName, int numargs, Oid *argtypes)
{
List *aggregateName = list_make2(makeString("pg_catalog"), makeString(functionName));
FuncCandidateList clist = FuncnameGetCandidates_compat(aggregateName, numargs, NIL,
FuncCandidateList clist = FuncnameGetCandidates(aggregateName, numargs, NIL,
false, false, false, true);
for (; clist; clist = clist->next)

View File

@ -272,7 +272,7 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList)
if (!targetListOnPartitionColumn)
{
if (!FindNodeMatchingCheckFunctionInRangeTableList(query->rtable,
IsDistributedTableRTE))
IsTableWithDistKeyRTE))
{
targetListOnPartitionColumn = true;
}
@ -379,6 +379,20 @@ IsReferenceTableRTE(Node *node)
}
/*
* IsTableWithDistKeyRTE gets a node and returns true if the node
* is a range table relation entry that points to a distributed table
* that has a distribution column.
*/
bool
IsTableWithDistKeyRTE(Node *node)
{
Oid relationId = NodeTryGetRteRelid(node);
return relationId != InvalidOid && IsCitusTable(relationId) &&
HasDistributionKey(relationId);
}
/*
* FullCompositeFieldList gets a composite field list, and checks if all fields
* of composite type are used in the list.
@ -1014,7 +1028,8 @@ ErrorHintRequired(const char *errorHint, Query *queryTree)
{
continue;
}
else if (IsCitusTableType(relationId, HASH_DISTRIBUTED))
else if (IsCitusTableType(relationId, HASH_DISTRIBUTED) ||
IsCitusTableType(relationId, SINGLE_SHARD_DISTRIBUTED))
{
int colocationId = TableColocationId(relationId);
colocationIdList = list_append_unique_int(colocationIdList, colocationId);

View File

@ -28,6 +28,7 @@
#include "access/xlog.h"
#include "catalog/pg_aggregate.h"
#include "catalog/pg_am.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
@ -69,6 +70,7 @@
#include "optimizer/restrictinfo.h"
#include "optimizer/tlist.h"
#include "parser/parse_relation.h"
#include "parser/parse_type.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteManip.h"
#include "utils/builtins.h"
@ -79,10 +81,11 @@
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
/* RepartitionJoinBucketCountPerNode determines bucket amount during repartitions */
int RepartitionJoinBucketCountPerNode = 8;
int RepartitionJoinBucketCountPerNode = 4;
/* Policy to use when assigning tasks to worker nodes */
int TaskAssignmentPolicy = TASK_ASSIGNMENT_GREEDY;
@ -231,6 +234,11 @@ static List * FetchEqualityAttrNumsForRTEBoolExpr(BoolExpr *boolExpr);
static List * FetchEqualityAttrNumsForList(List *nodeList);
static int PartitionColumnIndex(Var *targetVar, List *targetList);
static List * GetColumnOriginalIndexes(Oid relationId);
static bool QueryTreeHasImproperForDeparseNodes(Node *inputNode, void *context);
static Node * AdjustImproperForDeparseNodes(Node *inputNode, void *context);
static bool IsImproperForDeparseRelabelTypeNode(Node *inputNode);
static bool IsImproperForDeparseCoerceViaIONode(Node *inputNode);
static CollateExpr * RelabelTypeToCollateExpr(RelabelType *relabelType);
/*
@ -2171,8 +2179,9 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
{
List *sqlTaskList = NIL;
uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */
int shardCount = 0;
bool *taskRequiredForShardIndex = NULL;
int minShardOffset = INT_MAX;
int prevShardCount = 0;
Bitmapset *taskRequiredForShardIndex = NULL;
/* error if shards are not co-partitioned */
ErrorIfUnsupportedShardDistribution(query);
@ -2186,10 +2195,6 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
return NIL;
}
/* defaults to be used if this is a reference table-only query */
int minShardOffset = 0;
int maxShardOffset = 0;
RelationRestriction *relationRestriction = NULL;
List *prunedShardList = NULL;
@ -2205,7 +2210,7 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
}
/* we expect distributed tables to have the same shard count */
if (shardCount > 0 && shardCount != cacheEntry->shardIntervalArrayLength)
if (prevShardCount > 0 && prevShardCount != cacheEntry->shardIntervalArrayLength)
{
*planningError = DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"shard counts of co-located tables do not "
@ -2213,16 +2218,7 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
NULL, NULL);
return NIL;
}
if (taskRequiredForShardIndex == NULL)
{
shardCount = cacheEntry->shardIntervalArrayLength;
taskRequiredForShardIndex = (bool *) palloc0(shardCount);
/* there is a distributed table, find the shard range */
minShardOffset = shardCount;
maxShardOffset = -1;
}
prevShardCount = cacheEntry->shardIntervalArrayLength;
/*
* For left joins we don't care about the shards pruned for the right hand side.
@ -2244,32 +2240,26 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
{
int shardIndex = shardInterval->shardIndex;
taskRequiredForShardIndex[shardIndex] = true;
taskRequiredForShardIndex =
bms_add_member(taskRequiredForShardIndex, shardIndex);
minShardOffset = Min(minShardOffset, shardIndex);
maxShardOffset = Max(maxShardOffset, shardIndex);
}
}
/*
* To avoid iterating through all shards indexes we keep the minimum and maximum
* offsets of shards that were not pruned away. This optimisation is primarily
* relevant for queries on range-distributed tables that, due to range filters,
* prune to a small number of adjacent shards.
* We keep track of minShardOffset to skip over a potentially big amount of pruned
* shards. However, we need to start at minShardOffset - 1 to make sure we don't
* miss to first/min shard recorder as bms_next_member will return the first member
* added after shardOffset. Meaning minShardOffset would be the first member we
* expect.
*
* In other cases, such as an OR condition on a hash-distributed table, we may
* still visit most or all shards even if some of them were pruned away. However,
* given that hash-distributed tables typically only have a few shards the
* iteration is still very fast.
* We don't have to keep track of maxShardOffset as the bitmapset will only have been
* allocated till the last shard we have added. Therefore, the iterator will quickly
* identify the end of the bitmapset.
*/
for (int shardOffset = minShardOffset; shardOffset <= maxShardOffset; shardOffset++)
int shardOffset = minShardOffset - 1;
while ((shardOffset = bms_next_member(taskRequiredForShardIndex, shardOffset)) >= 0)
{
if (taskRequiredForShardIndex != NULL && !taskRequiredForShardIndex[shardOffset])
{
/* this shard index is pruned away for all relations */
continue;
}
Task *subqueryTask = QueryPushdownTaskCreate(query, shardOffset,
relationRestrictionContext,
taskIdIndex,
@ -2359,7 +2349,7 @@ ErrorIfUnsupportedShardDistribution(Query *query)
ListCell *relationIdCell = NULL;
uint32 relationIndex = 0;
uint32 rangeDistributedRelationCount = 0;
uint32 hashDistributedRelationCount = 0;
uint32 hashDistOrSingleShardRelCount = 0;
uint32 appendDistributedRelationCount = 0;
foreach(relationIdCell, relationIdList)
@ -2371,9 +2361,10 @@ ErrorIfUnsupportedShardDistribution(Query *query)
nonReferenceRelations = lappend_oid(nonReferenceRelations,
relationId);
}
else if (IsCitusTableType(relationId, HASH_DISTRIBUTED))
else if (IsCitusTableType(relationId, HASH_DISTRIBUTED) ||
IsCitusTableType(relationId, SINGLE_SHARD_DISTRIBUTED))
{
hashDistributedRelationCount++;
hashDistOrSingleShardRelCount++;
nonReferenceRelations = lappend_oid(nonReferenceRelations,
relationId);
}
@ -2388,7 +2379,7 @@ ErrorIfUnsupportedShardDistribution(Query *query)
}
}
if ((rangeDistributedRelationCount > 0) && (hashDistributedRelationCount > 0))
if ((rangeDistributedRelationCount > 0) && (hashDistOrSingleShardRelCount > 0))
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot push down this subquery"),
@ -2402,7 +2393,7 @@ ErrorIfUnsupportedShardDistribution(Query *query)
errdetail("A query including both range and append "
"partitioned relations are unsupported")));
}
else if ((appendDistributedRelationCount > 0) && (hashDistributedRelationCount > 0))
else if ((appendDistributedRelationCount > 0) && (hashDistOrSingleShardRelCount > 0))
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot push down this subquery"),
@ -2431,8 +2422,9 @@ ErrorIfUnsupportedShardDistribution(Query *query)
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot push down this subquery"),
errdetail("Shards of relations in subquery need to "
"have 1-to-1 shard partitioning")));
errdetail("%s and %s are not colocated",
get_rel_name(firstTableRelationId),
get_rel_name(currentRelationId))));
}
}
}
@ -2487,7 +2479,7 @@ QueryPushdownTaskCreate(Query *originalQuery, int shardIndex,
/* non-distributed tables have only one shard */
shardInterval = cacheEntry->sortedShardIntervalArray[0];
/* only use reference table as anchor shard if none exists yet */
/* use as anchor shard only if we couldn't find any yet */
if (anchorShardId == INVALID_SHARD_ID)
{
anchorShardId = shardInterval->shardId;
@ -2683,6 +2675,18 @@ SqlTaskList(Job *job)
List *fragmentCombinationList = FragmentCombinationList(rangeTableFragmentsList,
jobQuery, dependentJobList);
/*
* Adjust RelabelType and CoerceViaIO nodes that are improper for deparsing.
* We first check if there are any such nodes by using a query tree walker.
* The reason is that a query tree mutator will create a deep copy of all
* the query sublinks, and we don't want to do that unless necessary, as it
* would be inefficient.
*/
if (QueryTreeHasImproperForDeparseNodes((Node *) jobQuery, NULL))
{
jobQuery = (Query *) AdjustImproperForDeparseNodes((Node *) jobQuery, NULL);
}
ListCell *fragmentCombinationCell = NULL;
foreach(fragmentCombinationCell, fragmentCombinationList)
{
@ -2733,7 +2737,7 @@ SqlTaskList(Job *job)
* RelabelTypeToCollateExpr converts RelabelType's into CollationExpr's.
* With that, we will be able to pushdown COLLATE's.
*/
CollateExpr *
static CollateExpr *
RelabelTypeToCollateExpr(RelabelType *relabelType)
{
Assert(OidIsValid(relabelType->resultcollid));
@ -2793,15 +2797,15 @@ AnchorRangeTableId(List *rangeTableList)
* have the most number of shards, we have a draw.
*/
List *baseTableIdList = BaseRangeTableIdList(rangeTableList);
List *anchorTableIdList = AnchorRangeTableIdList(rangeTableList, baseTableIdList);
List *anchorTableRTIList = AnchorRangeTableIdList(rangeTableList, baseTableIdList);
ListCell *anchorTableIdCell = NULL;
int anchorTableIdCount = list_length(anchorTableIdList);
int anchorTableIdCount = list_length(anchorTableRTIList);
Assert(anchorTableIdCount > 0);
if (anchorTableIdCount == 1)
{
anchorRangeTableId = (uint32) linitial_int(anchorTableIdList);
anchorRangeTableId = (uint32) linitial_int(anchorTableRTIList);
return anchorRangeTableId;
}
@ -2809,7 +2813,7 @@ AnchorRangeTableId(List *rangeTableList)
* If more than one table has the most number of shards, we break the draw
* by comparing table sizes and picking the table with the largest size.
*/
foreach(anchorTableIdCell, anchorTableIdList)
foreach(anchorTableIdCell, anchorTableRTIList)
{
uint32 anchorTableId = (uint32) lfirst_int(anchorTableIdCell);
RangeTblEntry *tableEntry = rt_fetch(anchorTableId, rangeTableList);
@ -2837,7 +2841,7 @@ AnchorRangeTableId(List *rangeTableList)
if (anchorRangeTableId == 0)
{
/* all tables have the same shard count and size 0, pick the first */
anchorRangeTableId = (uint32) linitial_int(anchorTableIdList);
anchorRangeTableId = (uint32) linitial_int(anchorTableRTIList);
}
return anchorRangeTableId;
@ -2878,7 +2882,7 @@ BaseRangeTableIdList(List *rangeTableList)
static List *
AnchorRangeTableIdList(List *rangeTableList, List *baseRangeTableIdList)
{
List *anchorTableIdList = NIL;
List *anchorTableRTIList = NIL;
uint32 maxShardCount = 0;
ListCell *baseRangeTableIdCell = NULL;
@ -2888,25 +2892,46 @@ AnchorRangeTableIdList(List *rangeTableList, List *baseRangeTableIdList)
return baseRangeTableIdList;
}
uint32 referenceTableRTI = 0;
foreach(baseRangeTableIdCell, baseRangeTableIdList)
{
uint32 baseRangeTableId = (uint32) lfirst_int(baseRangeTableIdCell);
RangeTblEntry *tableEntry = rt_fetch(baseRangeTableId, rangeTableList);
List *shardList = LoadShardList(tableEntry->relid);
Oid citusTableId = tableEntry->relid;
if (IsCitusTableType(citusTableId, REFERENCE_TABLE))
{
referenceTableRTI = baseRangeTableId;
continue;
}
List *shardList = LoadShardList(citusTableId);
uint32 shardCount = (uint32) list_length(shardList);
if (shardCount > maxShardCount)
{
anchorTableIdList = list_make1_int(baseRangeTableId);
anchorTableRTIList = list_make1_int(baseRangeTableId);
maxShardCount = shardCount;
}
else if (shardCount == maxShardCount)
{
anchorTableIdList = lappend_int(anchorTableIdList, baseRangeTableId);
anchorTableRTIList = lappend_int(anchorTableRTIList, baseRangeTableId);
}
}
return anchorTableIdList;
/*
* We favor distributed tables over reference tables as anchor tables. But
* in case we cannot find any distributed tables, we let reference table to be
* anchor table. For now, we cannot see a query that might require this, but we
* want to be backward compatiable.
*/
if (list_length(anchorTableRTIList) == 0)
{
return referenceTableRTI > 0 ? list_make1_int(referenceTableRTI) : NIL;
}
return anchorTableRTIList;
}
@ -5593,3 +5618,126 @@ TaskListHighestTaskId(List *taskList)
return highestTaskId;
}
/*
* QueryTreeHasImproperForDeparseNodes walks over the node,
* and returns true if there are RelabelType or
* CoerceViaIONodes which are improper for deparse
*/
static bool
QueryTreeHasImproperForDeparseNodes(Node *inputNode, void *context)
{
if (inputNode == NULL)
{
return false;
}
else if (IsImproperForDeparseRelabelTypeNode(inputNode) ||
IsImproperForDeparseCoerceViaIONode(inputNode))
{
return true;
}
else if (IsA(inputNode, Query))
{
return query_tree_walker((Query *) inputNode,
QueryTreeHasImproperForDeparseNodes,
NULL, 0);
}
return expression_tree_walker(inputNode,
QueryTreeHasImproperForDeparseNodes,
NULL);
}
/*
* AdjustImproperForDeparseNodes takes an input rewritten query and modifies
* nodes which, after going through our planner, pose a problem when
* deparsing. So far we have two such type of Nodes that may pose problems:
* RelabelType and CoerceIO nodes.
* Details will be written in comments in the corresponding if conditions.
*/
static Node *
AdjustImproperForDeparseNodes(Node *inputNode, void *context)
{
if (inputNode == NULL)
{
return NULL;
}
if (IsImproperForDeparseRelabelTypeNode(inputNode))
{
/*
* The planner converts CollateExpr to RelabelType
* and here we convert back.
*/
return (Node *) RelabelTypeToCollateExpr((RelabelType *) inputNode);
}
else if (IsImproperForDeparseCoerceViaIONode(inputNode))
{
/*
* The planner converts some ::text/::varchar casts to ::cstring
* and here we convert back to text because cstring is a pseudotype
* and it cannot be casted to most resulttypes
*/
CoerceViaIO *iocoerce = (CoerceViaIO *) inputNode;
Node *arg = (Node *) iocoerce->arg;
Const *cstringToText = (Const *) arg;
cstringToText->consttype = TEXTOID;
cstringToText->constlen = -1;
Type textType = typeidType(TEXTOID);
char *constvalue = NULL;
if (!cstringToText->constisnull)
{
constvalue = DatumGetCString(cstringToText->constvalue);
}
cstringToText->constvalue = stringTypeDatum(textType,
constvalue,
cstringToText->consttypmod);
ReleaseSysCache(textType);
return inputNode;
}
else if (IsA(inputNode, Query))
{
return (Node *) query_tree_mutator((Query *) inputNode,
AdjustImproperForDeparseNodes,
NULL, QTW_DONT_COPY_QUERY);
}
return expression_tree_mutator(inputNode, AdjustImproperForDeparseNodes, NULL);
}
/*
* Checks if the given node is of Relabel type which is improper for deparsing
* The planner converts some CollateExpr to RelabelType nodes, and we need
* to find these nodes. They would be improperly deparsed without the
* "COLLATE" expression.
*/
static bool
IsImproperForDeparseRelabelTypeNode(Node *inputNode)
{
return (IsA(inputNode, RelabelType) &&
OidIsValid(((RelabelType *) inputNode)->resultcollid) &&
((RelabelType *) inputNode)->resultcollid != DEFAULT_COLLATION_OID);
}
/*
* Checks if the given node is of CoerceViaIO type which is improper for deparsing
* The planner converts some ::text/::varchar casts to ::cstring, and we need
* to find these nodes. They would be improperly deparsed with "cstring" which cannot
* be casted to most resulttypes.
*/
static bool
IsImproperForDeparseCoerceViaIONode(Node *inputNode)
{
return (IsA(inputNode, CoerceViaIO) &&
IsA(((CoerceViaIO *) inputNode)->arg, Const) &&
((Const *) ((CoerceViaIO *) inputNode)->arg)->consttype == CSTRINGOID);
}

View File

@ -152,10 +152,8 @@ static List * ExtractInsertValuesList(Query *query, Var *partitionColumn);
static DeferredErrorMessage * DeferErrorIfUnsupportedRouterPlannableSelectQuery(
Query *query);
static DeferredErrorMessage * ErrorIfQueryHasUnroutableModifyingCTE(Query *queryTree);
#if PG_VERSION_NUM >= PG_VERSION_14
static DeferredErrorMessage * ErrorIfQueryHasCTEWithSearchClause(Query *queryTree);
static bool ContainsSearchClauseWalker(Node *node);
#endif
static bool ContainsSearchClauseWalker(Node *node, void *context);
static bool SelectsFromDistributedTable(List *rangeTableList, Query *query);
static ShardPlacement * CreateDummyPlacement(bool hasLocalRelation);
static ShardPlacement * CreateLocalDummyPlacement();
@ -388,6 +386,26 @@ AddPartitionKeyNotNullFilterToSelect(Query *subqery)
}
/*
* ExtractSourceResultRangeTableEntry Generic wrapper for modification commands that
* utilizes results as input, based on an source query.
*/
RangeTblEntry *
ExtractSourceResultRangeTableEntry(Query *query)
{
if (IsMergeQuery(query))
{
return ExtractMergeSourceRangeTableEntry(query);
}
else if (CheckInsertSelectQuery(query))
{
return ExtractSelectRangeTableEntry(query);
}
return NULL;
}
/*
* ExtractSelectRangeTableEntry returns the range table entry of the subquery.
* Note that the function expects and asserts that the input query be
@ -1098,14 +1116,12 @@ ModifyQuerySupported(Query *queryTree, Query *originalQuery, bool multiShardQuer
}
}
#if PG_VERSION_NUM >= PG_VERSION_14
DeferredErrorMessage *CTEWithSearchClauseError =
ErrorIfQueryHasCTEWithSearchClause(originalQuery);
if (CTEWithSearchClauseError != NULL)
{
return CTEWithSearchClauseError;
}
#endif
return NULL;
}
@ -1862,21 +1878,9 @@ RouterJob(Query *originalQuery, PlannerRestrictionContext *plannerRestrictionCon
}
if (*planningError)
{
/*
* For MERGE, we do _not_ plan any other router job than the MERGE job itself,
* let's not continue further down the lane in distributed planning, simply
* bail out.
*/
if (IsMergeQuery(originalQuery))
{
RaiseDeferredError(*planningError, ERROR);
}
else
{
return NULL;
}
}
Job *job = CreateJob(originalQuery);
job->partitionKeyValue = partitionKeyValue;
@ -1885,19 +1889,38 @@ RouterJob(Query *originalQuery, PlannerRestrictionContext *plannerRestrictionCon
{
RangeTblEntry *updateOrDeleteOrMergeRTE = ExtractResultRelationRTE(originalQuery);
/*
* If all of the shards are pruned, we replace the relation RTE into
* subquery RTE that returns no results. However, this is not useful
* for UPDATE and DELETE queries. Therefore, if we detect a UPDATE or
* DELETE RTE with subquery type, we just set task list to empty and return
* the job.
*/
if (updateOrDeleteOrMergeRTE->rtekind == RTE_SUBQUERY)
{
/*
* Not generating tasks for MERGE target relation might
* result in incorrect behavior as source rows with NOT
* MATCHED clause might qualify for insertion.
*/
if (IsMergeQuery(originalQuery))
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("Merge command is currently "
"unsupported with filters that "
"prunes down to zero shards"),
errhint("Avoid `WHERE false` clause or "
"any equivalent filters that "
"could prune down to zero shards")));
}
else
{
/*
* If all of the shards are pruned, we replace the
* relation RTE into subquery RTE that returns no
* results. However, this is not useful for UPDATE
* and DELETE queries. Therefore, if we detect a
* UPDATE or DELETE RTE with subquery type, we just
* set task list to empty and return the job.
*/
job->taskList = NIL;
return job;
}
}
}
if (isMultiShardModifyQuery)
{
@ -2246,10 +2269,8 @@ SelectsFromDistributedTable(List *rangeTableList, Query *query)
}
static bool ContainsOnlyLocalTables(RTEListProperties *rteProperties);
/*
* RouterQuery runs router pruning logic for SELECT, UPDATE and DELETE queries.
* RouterQuery runs router pruning logic for SELECT, UPDATE, DELETE, and MERGE queries.
* If there are shards present and query is routable, all RTEs have been updated
* to point to the relevant shards in the originalQuery. Also, placementList is
* filled with the list of worker nodes that has all the required shard placements
@ -2282,6 +2303,7 @@ PlanRouterQuery(Query *originalQuery,
DeferredErrorMessage *planningError = NULL;
bool shardsPresent = false;
CmdType commandType = originalQuery->commandType;
Oid targetRelationId = InvalidOid;
bool fastPathRouterQuery =
plannerRestrictionContext->fastPathRestrictionContext->fastPathRouterQuery;
@ -2348,13 +2370,7 @@ PlanRouterQuery(Query *originalQuery,
Assert(UpdateOrDeleteOrMergeQuery(originalQuery));
if (IsMergeQuery(originalQuery))
{
planningError = MergeQuerySupported(originalQuery,
isMultiShardQuery,
plannerRestrictionContext);
}
else
if (!IsMergeQuery(originalQuery))
{
planningError = ModifyQuerySupported(originalQuery, originalQuery,
isMultiShardQuery,
@ -2403,13 +2419,14 @@ PlanRouterQuery(Query *originalQuery,
/* both Postgres tables and materialized tables are locally avaliable */
RTEListProperties *rteProperties = GetRTEListPropertiesForQuery(originalQuery);
if (shardId == INVALID_SHARD_ID && ContainsOnlyLocalTables(rteProperties))
if (isLocalTableModification)
{
if (commandType != CMD_SELECT)
{
*isLocalTableModification = true;
}
*isLocalTableModification =
IsLocalTableModification(targetRelationId, originalQuery, shardId,
rteProperties);
}
bool hasPostgresLocalRelation =
rteProperties->hasPostgresLocalTable || rteProperties->hasMaterializedView;
List *taskPlacementList =
@ -2447,7 +2464,7 @@ PlanRouterQuery(Query *originalQuery,
* ContainsOnlyLocalTables returns true if there is only
* local tables and not any distributed or reference table.
*/
static bool
bool
ContainsOnlyLocalTables(RTEListProperties *rteProperties)
{
return !rteProperties->hasDistributedTable && !rteProperties->hasReferenceTable;
@ -2683,7 +2700,7 @@ TargetShardIntervalForFastPathQuery(Query *query, bool *isMultiShardQuery,
if (!HasDistributionKey(relationId))
{
/* we don't need to do shard pruning for non-distributed tables */
/* we don't need to do shard pruning for single shard tables */
return list_make1(LoadShardIntervalList(relationId));
}
@ -2973,7 +2990,7 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError)
Assert(query->commandType == CMD_INSERT);
/* reference tables and citus local tables can only have one shard */
/* tables that don't have distribution column can only have one shard */
if (!HasDistributionKeyCacheEntry(cacheEntry))
{
List *shardIntervalList = LoadShardIntervalList(distributedTableId);
@ -2991,6 +3008,12 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError)
ereport(ERROR, (errmsg("local table cannot have %d shards",
shardCount)));
}
else if (IsCitusTableTypeCacheEntry(cacheEntry, SINGLE_SHARD_DISTRIBUTED))
{
ereport(ERROR, (errmsg("distributed tables having a null shard key "
"cannot have %d shards",
shardCount)));
}
}
ShardInterval *shardInterval = linitial(shardIntervalList);
@ -3731,14 +3754,12 @@ DeferErrorIfUnsupportedRouterPlannableSelectQuery(Query *query)
NULL, NULL);
}
#if PG_VERSION_NUM >= PG_VERSION_14
DeferredErrorMessage *CTEWithSearchClauseError =
ErrorIfQueryHasCTEWithSearchClause(query);
if (CTEWithSearchClauseError != NULL)
{
return CTEWithSearchClauseError;
}
#endif
return ErrorIfQueryHasUnroutableModifyingCTE(query);
}
@ -3848,7 +3869,8 @@ ErrorIfQueryHasUnroutableModifyingCTE(Query *queryTree)
CitusTableCacheEntry *modificationTableCacheEntry =
GetCitusTableCacheEntry(distributedTableId);
if (!HasDistributionKeyCacheEntry(modificationTableCacheEntry))
if (!IsCitusTableTypeCacheEntry(modificationTableCacheEntry,
DISTRIBUTED_TABLE))
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"cannot router plan modification of a non-distributed table",
@ -3872,8 +3894,6 @@ ErrorIfQueryHasUnroutableModifyingCTE(Query *queryTree)
}
#if PG_VERSION_NUM >= PG_VERSION_14
/*
* ErrorIfQueryHasCTEWithSearchClause checks if the query contains any common table
* expressions with search clause and errors out if it does.
@ -3881,7 +3901,7 @@ ErrorIfQueryHasUnroutableModifyingCTE(Query *queryTree)
static DeferredErrorMessage *
ErrorIfQueryHasCTEWithSearchClause(Query *queryTree)
{
if (ContainsSearchClauseWalker((Node *) queryTree))
if (ContainsSearchClauseWalker((Node *) queryTree, NULL))
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"CTEs with search clauses are not supported",
@ -3896,7 +3916,7 @@ ErrorIfQueryHasCTEWithSearchClause(Query *queryTree)
* CommonTableExprs with search clause
*/
static bool
ContainsSearchClauseWalker(Node *node)
ContainsSearchClauseWalker(Node *node, void *context)
{
if (node == NULL)
{
@ -3920,9 +3940,6 @@ ContainsSearchClauseWalker(Node *node)
}
#endif
/*
* get_all_actual_clauses
*

View File

@ -168,11 +168,10 @@ AnchorRte(Query *subquery)
{
Oid relationId = currentRte->relid;
if (IsCitusTable(relationId) && !HasDistributionKey(relationId))
if (!IsCitusTableType(relationId, DISTRIBUTED_TABLE))
{
/*
* Non-distributed tables should not be the anchor rte since they
* don't have distribution key.
* We're not interested in non distributed relations.
*/
continue;
}

View File

@ -188,7 +188,6 @@ static Query * BuildReadIntermediateResultsQuery(List *targetEntryList,
List *columnAliasList,
Const *resultIdConst, Oid functionOid,
bool useBinaryCopyFormat);
static void UpdateVarNosInNode(Node *node, Index newVarNo);
static Query * CreateOuterSubquery(RangeTblEntry *rangeTableEntry,
List *outerSubqueryTargetList);
static List * GenerateRequiredColNamesFromTargetList(List *targetList);
@ -1891,7 +1890,7 @@ GenerateRequiredColNamesFromTargetList(List *targetList)
* UpdateVarNosInNode iterates the Vars in the
* given node and updates the varno's as the newVarNo.
*/
static void
void
UpdateVarNosInNode(Node *node, Index newVarNo)
{
List *varList = pull_var_clause(node, PVC_RECURSE_AGGREGATES |

View File

@ -155,6 +155,7 @@ static bool AllDistributedRelationsInRestrictionContextColocated(
RelationRestrictionContext *
restrictionContext);
static bool IsNotSafeRestrictionToRecursivelyPlan(Node *node);
static bool HasPlaceHolderVar(Node *node);
static JoinRestrictionContext * FilterJoinRestrictionContext(
JoinRestrictionContext *joinRestrictionContext, Relids
queryRteIdentities);
@ -2142,13 +2143,24 @@ GetRestrictInfoListForRelation(RangeTblEntry *rangeTblEntry,
* If the restriction involves multiple tables, we cannot add it to
* input relation's expression list.
*/
Relids varnos = pull_varnos_compat(relationRestriction->plannerInfo,
Relids varnos = pull_varnos(relationRestriction->plannerInfo,
(Node *) restrictionClause);
if (bms_num_members(varnos) != 1)
{
continue;
}
/*
* PlaceHolderVar is not relevant to be processed inside a restriction clause.
* Otherwise, pull_var_clause_default would throw error. PG would create
* the restriction to physical Var that PlaceHolderVar points anyway, so it is
* safe to skip this restriction.
*/
if (FindNodeMatchingCheckFunction((Node *) restrictionClause, HasPlaceHolderVar))
{
continue;
}
/*
* We're going to add this restriction expression to a subquery
* which consists of only one relation in its jointree. Thus,
@ -2214,6 +2226,16 @@ IsNotSafeRestrictionToRecursivelyPlan(Node *node)
}
/*
* HasPlaceHolderVar returns true if given node contains any PlaceHolderVar.
*/
static bool
HasPlaceHolderVar(Node *node)
{
return IsA(node, PlaceHolderVar);
}
/*
* FilterRelationRestrictionContext gets a relation restriction context and
* set of rte identities. It returns the relation restrictions that that appear

View File

@ -1536,7 +1536,7 @@ CreateSubscriptions(MultiConnection *sourceConnection,
quote_identifier(target->publication->name),
quote_identifier(target->replicationSlot->name));
if (EnableBinaryProtocol && PG_VERSION_NUM >= PG_VERSION_14)
if (EnableBinaryProtocol)
{
appendStringInfoString(createSubscriptionCommand, ", binary=true)");
}

View File

@ -44,7 +44,7 @@
#include "distributed/cte_inline.h"
#include "distributed/distributed_deadlock_detection.h"
#include "distributed/errormessage.h"
#include "distributed/insert_select_executor.h"
#include "distributed/repartition_executor.h"
#include "distributed/intermediate_result_pruning.h"
#include "distributed/local_multi_copy.h"
#include "distributed/local_executor.h"
@ -185,13 +185,14 @@ static void CitusObjectAccessHook(ObjectAccessType access, Oid classId, Oid obje
static void DoInitialCleanup(void);
static void ResizeStackToMaximumDepth(void);
static void multi_log_hook(ErrorData *edata);
static bool IsSequenceOverflowError(ErrorData *edata);
static void RegisterConnectionCleanup(void);
static void RegisterExternalClientBackendCounterDecrement(void);
static void CitusCleanupConnectionsAtExit(int code, Datum arg);
static void DecrementExternalClientBackendCounterAtExit(int code, Datum arg);
static void CreateRequiredDirectories(void);
static void RegisterCitusConfigVariables(void);
static void OverridePostgresConfigAssignHooks(void);
static void OverridePostgresConfigProperties(void);
static bool ErrorIfNotASuitableDeadlockFactor(double *newval, void **extra,
GucSource source);
static bool WarnIfDeprecatedExecutorUsed(int *newval, void **extra, GucSource source);
@ -213,6 +214,7 @@ static bool IsSuperuser(char *userName);
static void AdjustDynamicLibraryPathForCdcDecoders(void);
static ClientAuthentication_hook_type original_client_auth_hook = NULL;
static emit_log_hook_type original_emit_log_hook = NULL;
/* *INDENT-OFF* */
/* GUC enum definitions */
@ -458,6 +460,7 @@ _PG_init(void)
ExecutorEnd_hook = CitusAttributeToEnd;
/* register hook for error messages */
original_emit_log_hook = emit_log_hook;
emit_log_hook = multi_log_hook;
@ -681,6 +684,15 @@ multi_log_hook(ErrorData *edata)
* Show the user a meaningful error message when a backend is cancelled
* by the distributed deadlock detection. Also reset the state for this,
* since the next cancelation of the backend might have another reason.
*
* We also want to provide a useful hint for sequence overflow errors
* because they're likely to be caused by the way Citus handles smallint/int
* based sequences on worker nodes. Note that we add the hint without checking
* whether we're on a worker node or the sequence was used on a distributed
* table because catalog might not be available at this point. And given
* that this hint might be shown for regular Postgres tables too, we inject
* the hint only when EnableUnsupportedFeatureMessages is set to true.
* Otherwise, vanilla tests would fail.
*/
bool clearState = true;
if (edata->elevel == ERROR && edata->sqlerrcode == ERRCODE_QUERY_CANCELED &&
@ -698,6 +710,40 @@ multi_log_hook(ErrorData *edata)
edata->message = pstrdup("canceling the transaction since it was "
"involved in a distributed deadlock");
}
else if (EnableUnsupportedFeatureMessages &&
IsSequenceOverflowError(edata))
{
edata->detail = pstrdup("nextval(sequence) calls in worker nodes "
"are not supported for column defaults of "
"type int or smallint");
edata->hint = pstrdup("If the command was issued from a worker node, "
"try issuing it from the coordinator node "
"instead.");
}
if (original_emit_log_hook)
{
original_emit_log_hook(edata);
}
}
/*
* IsSequenceOverflowError returns true if the given error is a sequence
* overflow error.
*/
static bool
IsSequenceOverflowError(ErrorData *edata)
{
static const char *sequenceOverflowedMsgPrefix =
"nextval: reached maximum value of sequence";
static const int sequenceOverflowedMsgPrefixLen = 42;
return edata->elevel == ERROR &&
edata->sqlerrcode == ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED &&
edata->message != NULL &&
strncmp(edata->message, sequenceOverflowedMsgPrefix,
sequenceOverflowedMsgPrefixLen) == 0;
}
@ -878,7 +924,7 @@ RegisterCitusConfigVariables(void)
&AllowModificationsFromWorkersToReplicatedTables,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -895,7 +941,7 @@ RegisterCitusConfigVariables(void)
&AllowNestedDistributedExecution,
false,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -915,7 +961,7 @@ RegisterCitusConfigVariables(void)
&AllowUnsafeConstraints,
false,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -935,7 +981,7 @@ RegisterCitusConfigVariables(void)
&EnableAcquiringUnsafeLockFromWorkers,
false,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomIntVariable(
@ -956,7 +1002,7 @@ RegisterCitusConfigVariables(void)
&CheckAvailableSpaceBeforeMove,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomStringVariable(
@ -995,7 +1041,7 @@ RegisterCitusConfigVariables(void)
&CopySwitchOverThresholdBytes,
4 * 1024 * 1024, 1, INT_MAX,
PGC_USERSET,
GUC_UNIT_BYTE | GUC_NO_SHOW_ALL,
GUC_UNIT_BYTE | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomRealVariable(
@ -1066,7 +1112,7 @@ RegisterCitusConfigVariables(void)
&CreateObjectPropagationMode,
CREATE_OBJECT_PROPAGATION_IMMEDIATE, create_object_propagation_options,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1139,7 +1185,7 @@ RegisterCitusConfigVariables(void)
&EnableAlterDatabaseOwner,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1150,7 +1196,7 @@ RegisterCitusConfigVariables(void)
&EnableAlterRolePropagation,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1160,7 +1206,7 @@ RegisterCitusConfigVariables(void)
&EnableAlterRoleSetPropagation,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1169,11 +1215,7 @@ RegisterCitusConfigVariables(void)
"Enables communication between nodes using binary protocol when possible"),
NULL,
&EnableBinaryProtocol,
#if PG_VERSION_NUM >= PG_VERSION_14
true,
#else
false,
#endif
PGC_USERSET,
GUC_STANDARD,
NULL, NULL, NULL);
@ -1199,7 +1241,7 @@ RegisterCitusConfigVariables(void)
&EnableClusterClock,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
"citus.enable_cost_based_connection_establishment",
@ -1210,7 +1252,7 @@ RegisterCitusConfigVariables(void)
&EnableCostBasedConnectionEstablishment,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1231,7 +1273,7 @@ RegisterCitusConfigVariables(void)
&EnableCreateTypePropagation,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1241,7 +1283,7 @@ RegisterCitusConfigVariables(void)
&EnableDDLPropagation,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1266,7 +1308,7 @@ RegisterCitusConfigVariables(void)
&EnableFastPathRouterPlanner,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1300,7 +1342,7 @@ RegisterCitusConfigVariables(void)
&EnableManualChangesToShards,
false,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomStringVariable(
@ -1311,7 +1353,7 @@ RegisterCitusConfigVariables(void)
&EnableManualMetadataChangesForUser,
"",
PGC_SIGHUP,
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL,
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1321,7 +1363,7 @@ RegisterCitusConfigVariables(void)
&EnableMetadataSync,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1339,9 +1381,9 @@ RegisterCitusConfigVariables(void)
"or altering the shard count of one of those distributed "
"tables."),
&EnableNonColocatedRouterQueryPushdown,
true,
false,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1361,7 +1403,7 @@ RegisterCitusConfigVariables(void)
&EnableRepartitionedInsertSelect,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1371,7 +1413,21 @@ RegisterCitusConfigVariables(void)
&EnableRouterExecution,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
"citus.enable_schema_based_sharding",
gettext_noop("Enables schema based sharding."),
gettext_noop("The schemas created while this is ON will be automatically "
"associated with individual colocation groups such that the "
"tables created in those schemas will be automatically "
"converted to colocated distributed tables without a shard "
"key."),
&EnableSchemaBasedSharding,
false,
PGC_USERSET,
GUC_STANDARD,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1382,7 +1438,7 @@ RegisterCitusConfigVariables(void)
&EnableSingleHashRepartitioning,
false,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1393,7 +1449,7 @@ RegisterCitusConfigVariables(void)
"and operating system name. This configuration value controls "
"whether these reports are sent."),
&EnableStatisticsCollection,
#if defined(HAVE_LIBCURL) && defined(ENABLE_CITUS_STATISTICS_COLLECTION)
#if defined(HAVE_LIBCURL)
true,
#else
false,
@ -1412,7 +1468,7 @@ RegisterCitusConfigVariables(void)
&EnableUniqueJobIds,
true,
PGC_USERSET,
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL,
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1423,7 +1479,7 @@ RegisterCitusConfigVariables(void)
&EnableUnsafeTriggers,
false,
PGC_USERSET,
GUC_STANDARD | GUC_NO_SHOW_ALL,
GUC_STANDARD | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1434,7 +1490,7 @@ RegisterCitusConfigVariables(void)
&EnableUnsupportedFeatureMessages,
true,
PGC_SUSET,
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL,
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1444,7 +1500,7 @@ RegisterCitusConfigVariables(void)
&EnableVersionChecks,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1457,7 +1513,7 @@ RegisterCitusConfigVariables(void)
&EnforceForeignKeyRestrictions,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1468,7 +1524,7 @@ RegisterCitusConfigVariables(void)
&EnforceLocalObjectRestrictions,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomIntVariable(
@ -1484,7 +1540,7 @@ RegisterCitusConfigVariables(void)
&ExecutorSlowStartInterval,
10, 0, INT_MAX,
PGC_USERSET,
GUC_UNIT_MS | GUC_NO_SHOW_ALL,
GUC_UNIT_MS | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1524,7 +1580,7 @@ RegisterCitusConfigVariables(void)
&ExplainDistributedQueries,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1540,7 +1596,7 @@ RegisterCitusConfigVariables(void)
&ForceMaxQueryParallelization,
false,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1554,7 +1610,7 @@ RegisterCitusConfigVariables(void)
&FunctionOpensTransactionBlock,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomStringVariable(
@ -1566,7 +1622,7 @@ RegisterCitusConfigVariables(void)
&GrepRemoteCommands,
"",
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1578,7 +1634,7 @@ RegisterCitusConfigVariables(void)
&HideCitusDependentObjects,
false,
PGC_USERSET,
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL,
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
/*
@ -1598,7 +1654,7 @@ RegisterCitusConfigVariables(void)
&DeprecatedEmptyString,
"",
PGC_SUSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomIntVariable(
@ -1608,7 +1664,7 @@ RegisterCitusConfigVariables(void)
&IsolationTestSessionProcessID,
-1, -1, INT_MAX,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomIntVariable(
@ -1618,7 +1674,7 @@ RegisterCitusConfigVariables(void)
&IsolationTestSessionRemoteProcessID,
-1, -1, INT_MAX,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomIntVariable(
@ -1643,7 +1699,7 @@ RegisterCitusConfigVariables(void)
&LocalCopyFlushThresholdByte,
512 * 1024, 1, INT_MAX,
PGC_USERSET,
GUC_UNIT_BYTE | GUC_NO_SHOW_ALL,
GUC_UNIT_BYTE | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomStringVariable(
@ -1700,7 +1756,7 @@ RegisterCitusConfigVariables(void)
&LogDistributedDeadlockDetection,
false,
PGC_SIGHUP,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1710,7 +1766,7 @@ RegisterCitusConfigVariables(void)
&LogIntermediateResults,
false,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1721,7 +1777,7 @@ RegisterCitusConfigVariables(void)
&LogLocalCommands,
false,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1732,7 +1788,7 @@ RegisterCitusConfigVariables(void)
&LogMultiJoinOrder,
false,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -1754,7 +1810,7 @@ RegisterCitusConfigVariables(void)
&LogicalReplicationTimeout,
2 * 60 * 60 * 1000, 0, 7 * 24 * 3600 * 1000,
PGC_SIGHUP,
GUC_NO_SHOW_ALL | GUC_UNIT_MS,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE | GUC_UNIT_MS,
NULL, NULL, NULL);
DefineCustomIntVariable(
@ -1889,7 +1945,7 @@ RegisterCitusConfigVariables(void)
&MaxRebalancerLoggedIgnoredMoves,
5, -1, INT_MAX,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomIntVariable(
@ -1934,7 +1990,7 @@ RegisterCitusConfigVariables(void)
&MetadataSyncInterval,
60 * MS_PER_SECOND, 1, 7 * MS_PER_DAY,
PGC_SIGHUP,
GUC_UNIT_MS | GUC_NO_SHOW_ALL,
GUC_UNIT_MS | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomEnumVariable(
@ -1949,7 +2005,7 @@ RegisterCitusConfigVariables(void)
&MetadataSyncTransMode,
METADATA_SYNC_TRANSACTIONAL, metadata_sync_mode_options,
PGC_SUSET,
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL,
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomIntVariable(
@ -1961,7 +2017,7 @@ RegisterCitusConfigVariables(void)
&MetadataSyncRetryInterval,
5 * MS_PER_SECOND, 1, 7 * MS_PER_DAY,
PGC_SIGHUP,
GUC_UNIT_MS | GUC_NO_SHOW_ALL,
GUC_UNIT_MS | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
/*
@ -1979,7 +2035,7 @@ RegisterCitusConfigVariables(void)
&MitmfifoEmptyString,
"",
PGC_SUSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomEnumVariable(
@ -2014,7 +2070,7 @@ RegisterCitusConfigVariables(void)
&NextCleanupRecordId,
0, 0, INT_MAX,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomIntVariable(
@ -2029,7 +2085,7 @@ RegisterCitusConfigVariables(void)
&NextOperationId,
0, 0, INT_MAX,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomIntVariable(
@ -2044,7 +2100,7 @@ RegisterCitusConfigVariables(void)
&NextPlacementId,
0, 0, INT_MAX,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomIntVariable(
@ -2059,7 +2115,7 @@ RegisterCitusConfigVariables(void)
&NextShardId,
0, 0, INT_MAX,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomIntVariable(
@ -2098,7 +2154,7 @@ RegisterCitusConfigVariables(void)
&OverrideTableVisibility,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -2113,7 +2169,7 @@ RegisterCitusConfigVariables(void)
&PreventIncompleteConnectionEstablishment,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -2124,7 +2180,7 @@ RegisterCitusConfigVariables(void)
&PropagateSessionSettingsForLoopbackConnection,
false,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomEnumVariable(
@ -2138,6 +2194,23 @@ RegisterCitusConfigVariables(void)
GUC_STANDARD,
NULL, NULL, NULL);
DefineCustomIntVariable(
"citus.rebalancer_by_disk_size_base_cost",
gettext_noop(
"When using the by_disk_size rebalance strategy each shard group "
"will get this cost in bytes added to its actual disk size. This "
"is used to avoid creating a bad balance when there's very little "
"data in some of the shards. The assumption is that even empty "
"shards have some cost, because of parallelism and because empty "
"shard groups will likely grow in the future."),
gettext_noop(
"The main reason this is configurable, is so it can be lowered for Citus its regression tests."),
&RebalancerByDiskSizeBaseCost,
100 * 1024 * 1024, 0, INT_MAX,
PGC_USERSET,
GUC_UNIT_BYTE | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomIntVariable(
"citus.recover_2pc_interval",
gettext_noop("Sets the time to wait between recovering 2PCs."),
@ -2162,7 +2235,7 @@ RegisterCitusConfigVariables(void)
&RemoteCopyFlushThreshold,
8 * 1024 * 1024, 0, INT_MAX,
PGC_USERSET,
GUC_UNIT_BYTE | GUC_NO_SHOW_ALL,
GUC_UNIT_BYTE | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomIntVariable(
@ -2186,7 +2259,7 @@ RegisterCitusConfigVariables(void)
&RepartitionJoinBucketCountPerNode,
4, 1, INT_MAX,
PGC_SIGHUP,
GUC_STANDARD | GUC_NO_SHOW_ALL,
GUC_STANDARD | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
/* deprecated setting */
@ -2197,7 +2270,7 @@ RegisterCitusConfigVariables(void)
&DeprecatedReplicateReferenceTablesOnActivate,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomEnumVariable(
@ -2210,7 +2283,7 @@ RegisterCitusConfigVariables(void)
REPLICATION_MODEL_STREAMING,
replication_model_options,
PGC_SUSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
WarnIfReplicationModelIsSet, NULL, NULL);
DefineCustomBoolVariable(
@ -2223,7 +2296,7 @@ RegisterCitusConfigVariables(void)
&RunningUnderIsolationTest,
false,
PGC_SUSET,
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL,
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -2240,7 +2313,7 @@ RegisterCitusConfigVariables(void)
&SelectOpensTransactionBlock,
true,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomIntVariable(
@ -2297,7 +2370,7 @@ RegisterCitusConfigVariables(void)
&SkipAdvisoryLockPermissionChecks,
false,
GUC_SUPERUSER_ONLY,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomBoolVariable(
@ -2342,7 +2415,7 @@ RegisterCitusConfigVariables(void)
&SortReturning,
false,
PGC_SUSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
/*
@ -2358,7 +2431,7 @@ RegisterCitusConfigVariables(void)
&StatStatementsMax,
50000, 1000, 10000000,
PGC_POSTMASTER,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomIntVariable(
@ -2369,7 +2442,7 @@ RegisterCitusConfigVariables(void)
&StatStatementsPurgeInterval,
10, -1, INT_MAX,
PGC_SIGHUP,
GUC_UNIT_MS | GUC_NO_SHOW_ALL,
GUC_UNIT_MS | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
DefineCustomEnumVariable(
@ -2395,7 +2468,6 @@ RegisterCitusConfigVariables(void)
PGC_POSTMASTER,
GUC_STANDARD,
NULL, NULL, NULL);
DefineCustomEnumVariable(
"citus.stat_tenants_log_level",
gettext_noop("Sets the level of citus_stat_tenants log messages"),
@ -2430,6 +2502,16 @@ RegisterCitusConfigVariables(void)
GUC_STANDARD,
NULL, NULL, NULL);
DefineCustomRealVariable(
"citus.stat_tenants_untracked_sample_rate",
gettext_noop("Sampling rate for new tenants in citus_stat_tenants."),
NULL,
&StatTenantsSampleRateForNewTenants,
1, 0, 1,
PGC_USERSET,
GUC_STANDARD,
NULL, NULL, NULL);
DefineCustomBoolVariable(
"citus.subquery_pushdown",
gettext_noop("Usage of this GUC is highly discouraged, please read the long "
@ -2445,7 +2527,7 @@ RegisterCitusConfigVariables(void)
&SubqueryPushdown,
false,
PGC_USERSET,
GUC_NO_SHOW_ALL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NoticeIfSubqueryPushdownEnabled, NULL, NULL);
DefineCustomEnumVariable(
@ -2556,16 +2638,17 @@ RegisterCitusConfigVariables(void)
/* warn about config items in the citus namespace that are not registered above */
EmitWarningsOnPlaceholders("citus");
OverridePostgresConfigAssignHooks();
OverridePostgresConfigProperties();
}
/*
* OverridePostgresConfigAssignHooks overrides GUC assign hooks where we want
* custom behaviour.
* OverridePostgresConfigProperties overrides GUC properties where we want
* custom behaviour. We should consider using Postgres function find_option
* in this function once it is exported by Postgres in a later release.
*/
static void
OverridePostgresConfigAssignHooks(void)
OverridePostgresConfigProperties(void)
{
struct config_generic **guc_vars = get_guc_variables();
int gucCount = GetNumConfigOptions();
@ -2581,6 +2664,17 @@ OverridePostgresConfigAssignHooks(void)
OldApplicationNameAssignHook = stringVar->assign_hook;
stringVar->assign_hook = ApplicationNameAssignHook;
}
/*
* Turn on GUC_REPORT for search_path. GUC_REPORT provides that an S (Parameter Status)
* packet is appended after the C (Command Complete) packet sent from the server
* for SET command. S packet contains the new value of the parameter
* if its value has been changed.
*/
if (strcmp(var->name, "search_path") == 0)
{
var->flags |= GUC_REPORT;
}
}
}
@ -2845,12 +2939,14 @@ NodeConninfoGucAssignHook(const char *newval, void *extra)
newval = "";
}
if (strcmp(newval, NodeConninfo) == 0)
if (strcmp(newval, NodeConninfo) == 0 && checkAtBootPassed)
{
/* It did not change, no need to do anything */
return;
}
checkAtBootPassed = true;
PQconninfoOption *optionArray = PQconninfoParse(newval, NULL);
if (optionArray == NULL)
{

View File

@ -1,3 +1,51 @@
-- citus--11.3-1--12.0-1
-- bump version to 12.0-1
CREATE TABLE citus.pg_dist_schema (
schemaid oid NOT NULL,
colocationid int NOT NULL,
CONSTRAINT pg_dist_schema_pkey PRIMARY KEY (schemaid),
CONSTRAINT pg_dist_schema_unique_colocationid_index UNIQUE (colocationid)
);
ALTER TABLE citus.pg_dist_schema SET SCHEMA pg_catalog;
GRANT SELECT ON pg_catalog.pg_dist_schema TO public;
-- udfs used to modify pg_dist_schema on workers, to sync metadata
#include "udfs/citus_internal_add_tenant_schema/12.0-1.sql"
#include "udfs/citus_internal_delete_tenant_schema/12.0-1.sql"
#include "udfs/citus_prepare_pg_upgrade/12.0-1.sql"
#include "udfs/citus_finish_pg_upgrade/12.0-1.sql"
-- udfs used to modify pg_dist_schema globally via drop trigger
#include "udfs/citus_internal_unregister_tenant_schema_globally/12.0-1.sql"
#include "udfs/citus_drop_trigger/12.0-1.sql"
DROP VIEW citus_shards;
DROP FUNCTION citus_shard_sizes;
#include "udfs/citus_shard_sizes/12.0-1.sql"
#include "udfs/citus_tables/12.0-1.sql"
#include "udfs/citus_shards/12.0-1.sql"
#include "udfs/citus_schemas/12.0-1.sql"
-- udfs used to include schema-based tenants in tenant monitoring
#include "udfs/citus_stat_tenants_local/12.0-1.sql"
-- udfs to convert a regular/tenant schema to a tenant/regular schema
#include "udfs/citus_schema_distribute/12.0-1.sql"
#include "udfs/citus_schema_undistribute/12.0-1.sql"
#include "udfs/drop_old_time_partitions/12.0-1.sql"
#include "udfs/get_missing_time_partition_ranges/12.0-1.sql"
-- Update the default rebalance strategy to 'by_disk_size', but only if the
-- default is currently 'by_shard_count'
SELECT citus_set_default_rebalance_strategy(name)
FROM pg_dist_rebalance_strategy
WHERE name = 'by_disk_size'
AND (SELECT default_strategy FROM pg_dist_rebalance_strategy WHERE name = 'by_shard_count');

View File

@ -1,2 +1,83 @@
-- citus--12.0-1--11.3-1
-- this is an empty downgrade path since citus--11.3-1--12.0-1.sql is empty for now
DO $$
BEGIN
-- Throw an error if user has created any tenant schemas.
IF EXISTS (SELECT 1 FROM pg_catalog.pg_dist_schema)
THEN
RAISE EXCEPTION 'cannot downgrade Citus because there are '
'tenant schemas created.'
USING HINT = 'To downgrade Citus to an older version, you should '
'first issue SELECT citus.schema_tenant_unset("%s") '
'for each tenant schema.';
END IF;
-- Throw an error if user has any distributed tables without a shard key.
IF EXISTS (
SELECT 1 FROM pg_dist_partition
WHERE repmodel != 't' AND partmethod = 'n' AND colocationid != 0)
THEN
RAISE EXCEPTION 'cannot downgrade Citus because there are '
'distributed tables without a shard key.'
USING HINT = 'You can find the distributed tables without a shard '
'key in the cluster by using the following query: '
'"SELECT * FROM citus_tables WHERE distribution_column '
'= ''<none>'' AND colocation_id > 0".',
DETAIL = 'To downgrade Citus to an older version, you should '
'first convert those tables to Postgres tables by '
'executing SELECT undistribute_table("%s").';
END IF;
END;
$$ LANGUAGE plpgsql;
DROP FUNCTION pg_catalog.citus_schema_distribute(regnamespace);
DROP FUNCTION pg_catalog.citus_schema_undistribute(regnamespace);
DROP FUNCTION pg_catalog.citus_internal_add_tenant_schema(Oid, int);
#include "../udfs/citus_prepare_pg_upgrade/11.2-1.sql"
#include "../udfs/citus_finish_pg_upgrade/11.2-1.sql"
DROP FUNCTION pg_catalog.citus_internal_delete_tenant_schema(Oid);
DROP FUNCTION pg_catalog.citus_internal_unregister_tenant_schema_globally(Oid, text);
#include "../udfs/citus_drop_trigger/10.2-1.sql"
-- citus_schemas might be created in either of the schemas
DROP VIEW IF EXISTS public.citus_schemas;
DROP VIEW IF EXISTS pg_catalog.citus_schemas;
DROP VIEW pg_catalog.citus_shards;
DROP FUNCTION pg_catalog.citus_shard_sizes;
#include "../udfs/citus_shard_sizes/10.0-1.sql"
-- citus_shards/11.1-1.sql tries to create citus_shards in pg_catalog but it is not allowed.
-- Here we use citus_shards/10.0-1.sql to properly create the view in citus schema and
-- then alter it to pg_catalog, so citus_shards/11.1-1.sql can REPLACE it without any errors.
#include "../udfs/citus_shards/10.0-1.sql"
#include "../udfs/citus_tables/11.1-1.sql"
#include "../udfs/citus_shards/11.1-1.sql"
DROP TABLE pg_catalog.pg_dist_schema;
DROP VIEW pg_catalog.citus_stat_tenants_local;
DROP FUNCTION pg_catalog.citus_stat_tenants_local_internal(
BOOLEAN,
OUT INT,
OUT TEXT,
OUT INT,
OUT INT,
OUT INT,
OUT INT,
OUT DOUBLE PRECISION,
OUT DOUBLE PRECISION,
OUT BIGINT);
#include "../udfs/citus_stat_tenants_local/11.3-1.sql"
#include "../udfs/drop_old_time_partitions/10.2-1.sql"
#include "../udfs/get_missing_time_partition_ranges/10.2-1.sql"
-- This explicitly does not reset the rebalance strategy to by_shard_count,
-- because there's no way of knowing if the rebalance strategy before the
-- upgrade was by_disk_size or by_shard_count. And even in previous versions
-- by_disk_size is considered superior for quite some time.

Some files were not shown because too many files have changed in this diff Show More