Compare commits
56 Commits
Author | SHA1 | Date |
---|---|---|
|
d01527e77b | |
|
402ae1f240 | |
|
6cf47c0a38 | |
|
43a540cff1 | |
|
3493394bc7 | |
|
e00def3dbc | |
|
d8f8b1cfff | |
|
5e6eb2cd97 | |
|
96b767c96b | |
|
29403f60bc | |
|
c9e01e816d | |
|
854eec7380 | |
|
dd53a1ad1f | |
|
4e7d5060f5 | |
|
c0f35e782f | |
|
aa20cc53ba | |
|
df1b410b02 | |
|
b74de6f7ba | |
|
3bcde91bcb | |
|
5008a8f61a | |
|
38cf1f291a | |
|
3bd991d215 | |
|
6460fc45e4 | |
|
280bc704d0 | |
|
fb0bb40225 | |
|
9f4b6a6cb9 | |
|
3c5ea1b1f2 | |
|
0eb5c144ed | |
|
b3947510b9 | |
|
e64e627e31 | |
|
c456a933f0 | |
|
86f1e181c4 | |
|
84410da2ba | |
|
106d68fd61 | |
|
f571abcca6 | |
|
6fee3068e3 | |
|
6f400dab58 | |
|
90da684f56 | |
|
6986ac2f17 | |
|
998b044fdc | |
|
1507f32282 | |
|
20e500f96b | |
|
60424534ef | |
|
fefaed37e7 | |
|
5adc151e7c | |
|
6192dc2bff | |
|
3f6e903722 | |
|
9efd8e05d6 | |
|
210bcdcc08 | |
|
d3417a5e34 | |
|
e2330e8f87 | |
|
690dab316a | |
|
be6e372b27 | |
|
3d6bc315ab | |
|
4a904e070d | |
|
f8e06fb1ed |
|
@ -0,0 +1,620 @@
|
|||
version: 2.1
|
||||
orbs:
|
||||
codecov: codecov/codecov@1.1.1
|
||||
azure-cli: circleci/azure-cli@1.0.0
|
||||
|
||||
jobs:
|
||||
|
||||
build:
|
||||
description: Build the citus extension
|
||||
parameters:
|
||||
pg_major:
|
||||
description: postgres major version building citus for
|
||||
type: integer
|
||||
image:
|
||||
description: docker image to use for the build
|
||||
type: string
|
||||
default: citus/extbuilder
|
||||
image_tag:
|
||||
description: tag to use for the docker image
|
||||
type: string
|
||||
docker:
|
||||
- image: '<< parameters.image >>:<< parameters.image_tag >>'
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: 'Configure, Build, and Install'
|
||||
command: |
|
||||
./ci/build-citus.sh
|
||||
- persist_to_workspace:
|
||||
root: .
|
||||
paths:
|
||||
- build-<< parameters.pg_major >>/*
|
||||
- install-<<parameters.pg_major >>.tar
|
||||
|
||||
check-style:
|
||||
docker:
|
||||
- image: 'citus/stylechecker:latest'
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: 'Check Style'
|
||||
command: citus_indent --check
|
||||
- run:
|
||||
name: 'Fix whitespace'
|
||||
command: ci/editorconfig.sh
|
||||
- run:
|
||||
name: 'Check if whitespace fixing changed anything, install editorconfig if it did'
|
||||
command: git diff --exit-code
|
||||
- run:
|
||||
name: 'Remove useless declarations'
|
||||
command: ci/remove_useless_declarations.sh
|
||||
- run:
|
||||
name: 'Check if changed'
|
||||
command: git diff --cached --exit-code
|
||||
- run:
|
||||
name: 'Normalize test output'
|
||||
command: ci/normalize_expected.sh
|
||||
- run:
|
||||
name: 'Check if changed'
|
||||
command: git diff --exit-code
|
||||
- run:
|
||||
name: 'Check for C-style comments in migration files'
|
||||
command: ci/disallow_c_comments_in_migrations.sh
|
||||
- run:
|
||||
name: 'Check if changed'
|
||||
command: git diff --exit-code
|
||||
- run:
|
||||
name: 'Check for gitignore entries .for source files'
|
||||
command: ci/fix_gitignore.sh
|
||||
- run:
|
||||
name: 'Check if changed'
|
||||
command: git diff --exit-code
|
||||
- run:
|
||||
name: 'Check for lengths of changelog entries'
|
||||
command: ci/disallow_long_changelog_entries.sh
|
||||
- run:
|
||||
name: 'Check for banned C API usage'
|
||||
command: ci/banned.h.sh
|
||||
- run:
|
||||
name: 'Check for tests missing in schedules'
|
||||
command: ci/check_all_tests_are_run.sh
|
||||
- run:
|
||||
name: 'Check if all CI scripts are actually run'
|
||||
command: ci/check_all_ci_scripts_are_run.sh
|
||||
|
||||
check-sql-snapshots:
|
||||
docker:
|
||||
- image: 'citus/extbuilder:latest'
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: 'Check Snapshots'
|
||||
command: ci/check_sql_snapshots.sh
|
||||
|
||||
test-pg-upgrade:
|
||||
description: Runs postgres upgrade tests
|
||||
parameters:
|
||||
old_pg_major:
|
||||
description: 'postgres major version to use before the upgrade'
|
||||
type: integer
|
||||
new_pg_major:
|
||||
description: 'postgres major version to upgrade to'
|
||||
type: integer
|
||||
image:
|
||||
description: 'docker image to use as for the tests'
|
||||
type: string
|
||||
default: citus/pgupgradetester
|
||||
image_tag:
|
||||
description: 'docker image tag to use'
|
||||
type: string
|
||||
default: 12-13
|
||||
docker:
|
||||
- image: '<< parameters.image >>:<< parameters.image_tag >>'
|
||||
working_directory: /home/circleci/project
|
||||
steps:
|
||||
- checkout
|
||||
- attach_workspace:
|
||||
at: .
|
||||
- run:
|
||||
name: 'Install Extension'
|
||||
command: |
|
||||
tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-<< parameters.old_pg_major >>.tar" --directory /
|
||||
tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-<< parameters.new_pg_major >>.tar" --directory /
|
||||
- run:
|
||||
name: 'Configure'
|
||||
command: |
|
||||
chown -R circleci .
|
||||
gosu circleci ./configure
|
||||
- run:
|
||||
name: 'Enable core dumps'
|
||||
command: |
|
||||
ulimit -c unlimited
|
||||
- run:
|
||||
name: 'Install and test postgres upgrade'
|
||||
command: |
|
||||
gosu circleci \
|
||||
make -C src/test/regress \
|
||||
check-pg-upgrade \
|
||||
old-bindir=/usr/lib/postgresql/<< parameters.old_pg_major >>/bin \
|
||||
new-bindir=/usr/lib/postgresql/<< parameters.new_pg_major >>/bin
|
||||
no_output_timeout: 2m
|
||||
- run:
|
||||
name: 'Regressions'
|
||||
command: |
|
||||
if [ -f "src/test/regress/regression.diffs" ]; then
|
||||
cat src/test/regress/regression.diffs
|
||||
exit 1
|
||||
fi
|
||||
when: on_fail
|
||||
- run:
|
||||
name: 'Copy coredumps'
|
||||
command: |
|
||||
mkdir -p /tmp/core_dumps
|
||||
if ls core.* 1> /dev/null 2>&1; then
|
||||
cp core.* /tmp/core_dumps
|
||||
fi
|
||||
when: on_fail
|
||||
- store_artifacts:
|
||||
name: 'Save regressions'
|
||||
path: src/test/regress/regression.diffs
|
||||
when: on_fail
|
||||
- store_artifacts:
|
||||
name: 'Save core dumps'
|
||||
path: /tmp/core_dumps
|
||||
when: on_fail
|
||||
- codecov/upload:
|
||||
flags: 'test_<< parameters.old_pg_major >>_<< parameters.new_pg_major >>,upgrade'
|
||||
|
||||
test-citus-upgrade:
|
||||
description: Runs citus upgrade tests
|
||||
parameters:
|
||||
pg_major:
|
||||
description: "postgres major version"
|
||||
type: integer
|
||||
image:
|
||||
description: 'docker image to use as for the tests'
|
||||
type: string
|
||||
default: citus/citusupgradetester
|
||||
image_tag:
|
||||
description: 'docker image tag to use'
|
||||
type: string
|
||||
docker:
|
||||
- image: '<< parameters.image >>:<< parameters.image_tag >>'
|
||||
working_directory: /home/circleci/project
|
||||
steps:
|
||||
- checkout
|
||||
- attach_workspace:
|
||||
at: .
|
||||
- run:
|
||||
name: 'Configure'
|
||||
command: |
|
||||
chown -R circleci .
|
||||
gosu circleci ./configure
|
||||
- run:
|
||||
name: 'Enable core dumps'
|
||||
command: |
|
||||
ulimit -c unlimited
|
||||
- run:
|
||||
name: 'Install and test citus upgrade'
|
||||
command: |
|
||||
# run make check-citus-upgrade for all citus versions
|
||||
# the image has ${CITUS_VERSIONS} set with all verions it contains the binaries of
|
||||
for citus_version in ${CITUS_VERSIONS}; do \
|
||||
export upgrade_test_old_citus_version="$citus_version"; \
|
||||
gosu circleci \
|
||||
make -C src/test/regress \
|
||||
check-citus-upgrade \
|
||||
bindir=/usr/lib/postgresql/${PG_MAJOR}/bin \
|
||||
citus-pre-tar=/install-pg${PG_MAJOR}-citus${citus_version}.tar \
|
||||
citus-post-tar=/home/circleci/project/install-$PG_MAJOR.tar; \
|
||||
done;
|
||||
|
||||
# run make check-citus-upgrade-mixed for all citus versions
|
||||
# the image has ${CITUS_VERSIONS} set with all verions it contains the binaries of
|
||||
for citus_version in ${CITUS_VERSIONS}; do \
|
||||
gosu circleci \
|
||||
make -C src/test/regress \
|
||||
check-citus-upgrade-mixed \
|
||||
bindir=/usr/lib/postgresql/${PG_MAJOR}/bin \
|
||||
citus-pre-tar=/install-pg${PG_MAJOR}-citus${citus_version}.tar \
|
||||
citus-post-tar=/home/circleci/project/install-$PG_MAJOR.tar; \
|
||||
done;
|
||||
no_output_timeout: 2m
|
||||
- run:
|
||||
name: 'Regressions'
|
||||
command: |
|
||||
if [ -f "src/test/regress/regression.diffs" ]; then
|
||||
cat src/test/regress/regression.diffs
|
||||
exit 1
|
||||
fi
|
||||
when: on_fail
|
||||
- run:
|
||||
name: 'Copy coredumps'
|
||||
command: |
|
||||
mkdir -p /tmp/core_dumps
|
||||
if ls core.* 1> /dev/null 2>&1; then
|
||||
cp core.* /tmp/core_dumps
|
||||
fi
|
||||
when: on_fail
|
||||
- store_artifacts:
|
||||
name: 'Save regressions'
|
||||
path: src/test/regress/regression.diffs
|
||||
when: on_fail
|
||||
- store_artifacts:
|
||||
name: 'Save core dumps'
|
||||
path: /tmp/core_dumps
|
||||
when: on_fail
|
||||
- codecov/upload:
|
||||
flags: 'test_<< parameters.pg_major >>,upgrade'
|
||||
|
||||
test-citus:
|
||||
description: Runs the common tests of citus
|
||||
parameters:
|
||||
pg_major:
|
||||
description: "postgres major version"
|
||||
type: integer
|
||||
image:
|
||||
description: 'docker image to use as for the tests'
|
||||
type: string
|
||||
default: citus/exttester
|
||||
image_tag:
|
||||
description: 'docker image tag to use'
|
||||
type: string
|
||||
make:
|
||||
description: "make target"
|
||||
type: string
|
||||
docker:
|
||||
- image: '<< parameters.image >>:<< parameters.image_tag >>'
|
||||
working_directory: /home/circleci/project
|
||||
steps:
|
||||
- checkout
|
||||
- attach_workspace:
|
||||
at: .
|
||||
- run:
|
||||
name: 'Install Extension'
|
||||
command: |
|
||||
tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-${PG_MAJOR}.tar" --directory /
|
||||
- run:
|
||||
name: 'Configure'
|
||||
command: |
|
||||
chown -R circleci .
|
||||
gosu circleci ./configure
|
||||
- run:
|
||||
name: 'Enable core dumps'
|
||||
command: |
|
||||
ulimit -c unlimited
|
||||
- run:
|
||||
name: 'Run Test'
|
||||
command: |
|
||||
gosu circleci make -C src/test/regress << parameters.make >>
|
||||
no_output_timeout: 2m
|
||||
- run:
|
||||
name: 'Regressions'
|
||||
command: |
|
||||
if [ -f "src/test/regress/regression.diffs" ]; then
|
||||
cat src/test/regress/regression.diffs
|
||||
exit 1
|
||||
fi
|
||||
when: on_fail
|
||||
- run:
|
||||
name: 'Copy coredumps'
|
||||
command: |
|
||||
mkdir -p /tmp/core_dumps
|
||||
if ls core.* 1> /dev/null 2>&1; then
|
||||
cp core.* /tmp/core_dumps
|
||||
fi
|
||||
when: on_fail
|
||||
- store_artifacts:
|
||||
name: 'Save regressions'
|
||||
path: src/test/regress/regression.diffs
|
||||
when: on_fail
|
||||
- store_artifacts:
|
||||
name: 'Save core dumps'
|
||||
path: /tmp/core_dumps
|
||||
when: on_fail
|
||||
- codecov/upload:
|
||||
flags: 'test_<< parameters.pg_major >>,<< parameters.make >>'
|
||||
when: always
|
||||
|
||||
tap-test-citus:
|
||||
description: Runs tap tests for citus
|
||||
parameters:
|
||||
pg_major:
|
||||
description: "postgres major version"
|
||||
type: integer
|
||||
image:
|
||||
description: 'docker image to use as for the tests'
|
||||
type: string
|
||||
default: citus/exttester
|
||||
image_tag:
|
||||
description: 'docker image tag to use'
|
||||
type: string
|
||||
suite:
|
||||
description: 'name of the tap test suite to run'
|
||||
type: string
|
||||
make:
|
||||
description: "make target"
|
||||
type: string
|
||||
default: installcheck
|
||||
docker:
|
||||
- image: '<< parameters.image >>:<< parameters.image_tag >>'
|
||||
working_directory: /home/circleci/project
|
||||
steps:
|
||||
- checkout
|
||||
- attach_workspace:
|
||||
at: .
|
||||
- run:
|
||||
name: 'Install Extension'
|
||||
command: |
|
||||
tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-${PG_MAJOR}.tar" --directory /
|
||||
- run:
|
||||
name: 'Configure'
|
||||
command: |
|
||||
chown -R circleci .
|
||||
gosu circleci ./configure
|
||||
- run:
|
||||
name: 'Enable core dumps'
|
||||
command: |
|
||||
ulimit -c unlimited
|
||||
- run:
|
||||
name: 'Run Test'
|
||||
command: |
|
||||
gosu circleci make -C src/test/<< parameters.suite >> << parameters.make >>
|
||||
no_output_timeout: 2m
|
||||
- run:
|
||||
name: 'Copy coredumps'
|
||||
command: |
|
||||
mkdir -p /tmp/core_dumps
|
||||
if ls core.* 1> /dev/null 2>&1; then
|
||||
cp core.* /tmp/core_dumps
|
||||
fi
|
||||
when: on_fail
|
||||
- store_artifacts:
|
||||
name: 'Save tap logs'
|
||||
path: /home/circleci/project/src/test/<< parameters.suite >>/tmp_check/log
|
||||
when: on_fail
|
||||
- store_artifacts:
|
||||
name: 'Save core dumps'
|
||||
path: /tmp/core_dumps
|
||||
when: on_fail
|
||||
- codecov/upload:
|
||||
flags: 'test_<< parameters.pg_major >>,tap_<< parameters.suite >>_<< parameters.make >>'
|
||||
when: always
|
||||
|
||||
check-merge-to-enterprise:
|
||||
docker:
|
||||
- image: citus/extbuilder:13.2
|
||||
working_directory: /home/circleci/project
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
command: |
|
||||
ci/check_enterprise_merge.sh
|
||||
|
||||
ch_benchmark:
|
||||
docker:
|
||||
- image: buildpack-deps:stretch
|
||||
working_directory: /home/circleci/project
|
||||
steps:
|
||||
- checkout
|
||||
- azure-cli/install
|
||||
- azure-cli/login-with-service-principal
|
||||
- run:
|
||||
command: |
|
||||
cd ./src/test/hammerdb
|
||||
sh run_hammerdb.sh citusbot_ch_benchmark_rg
|
||||
name: install dependencies and run ch_benchmark tests
|
||||
no_output_timeout: 20m
|
||||
|
||||
tpcc_benchmark:
|
||||
docker:
|
||||
- image: buildpack-deps:stretch
|
||||
working_directory: /home/circleci/project
|
||||
steps:
|
||||
- checkout
|
||||
- azure-cli/install
|
||||
- azure-cli/login-with-service-principal
|
||||
- run:
|
||||
command: |
|
||||
cd ./src/test/hammerdb
|
||||
sh run_hammerdb.sh citusbot_tpcc_benchmark_rg
|
||||
name: install dependencies and run ch_benchmark tests
|
||||
no_output_timeout: 20m
|
||||
|
||||
workflows:
|
||||
version: 2
|
||||
build_and_test:
|
||||
jobs:
|
||||
|
||||
- check-merge-to-enterprise:
|
||||
filters:
|
||||
branches:
|
||||
ignore:
|
||||
- /release-[0-9]+\.[0-9]+.*/ # match with releaseX.Y.*
|
||||
|
||||
- build:
|
||||
name: build-12
|
||||
pg_major: 12
|
||||
image_tag: '12.6'
|
||||
- build:
|
||||
name: build-13
|
||||
pg_major: 13
|
||||
image_tag: '13.2'
|
||||
|
||||
- check-style
|
||||
- check-sql-snapshots
|
||||
|
||||
- test-citus:
|
||||
name: 'test-12_check-multi'
|
||||
pg_major: 12
|
||||
image_tag: '12.6'
|
||||
make: check-multi
|
||||
requires: [build-12]
|
||||
- test-citus:
|
||||
name: 'test-12_check-mx'
|
||||
pg_major: 12
|
||||
image_tag: '12.6'
|
||||
make: check-multi-mx
|
||||
requires: [build-12]
|
||||
- test-citus:
|
||||
name: 'test-12_check-vanilla'
|
||||
pg_major: 12
|
||||
image_tag: '12.6'
|
||||
make: check-vanilla
|
||||
requires: [build-12]
|
||||
- test-citus:
|
||||
name: 'test-12_check-isolation'
|
||||
pg_major: 12
|
||||
image_tag: '12.6'
|
||||
make: check-isolation
|
||||
requires: [build-12]
|
||||
- test-citus:
|
||||
name: 'test-12_check-worker'
|
||||
pg_major: 12
|
||||
image_tag: '12.6'
|
||||
make: check-worker
|
||||
requires: [build-12]
|
||||
- test-citus:
|
||||
name: 'test-12_check-operations'
|
||||
pg_major: 12
|
||||
image_tag: '12.6'
|
||||
make: check-operations
|
||||
requires: [build-12]
|
||||
- test-citus:
|
||||
name: 'test-12_check-follower-cluster'
|
||||
pg_major: 12
|
||||
image_tag: '12.6'
|
||||
make: check-follower-cluster
|
||||
requires: [build-12]
|
||||
- test-citus:
|
||||
name: 'test-12_check-columnar'
|
||||
pg_major: 12
|
||||
image_tag: '12.6'
|
||||
make: check-columnar
|
||||
requires: [build-12]
|
||||
- test-citus:
|
||||
name: 'test-12_check-columnar-isolation'
|
||||
pg_major: 12
|
||||
image_tag: '12.6'
|
||||
make: check-columnar-isolation
|
||||
requires: [build-12]
|
||||
- tap-test-citus:
|
||||
name: 'test_12_tap-recovery'
|
||||
pg_major: 12
|
||||
image_tag: '12.6'
|
||||
suite: recovery
|
||||
requires: [build-12]
|
||||
- tap-test-citus:
|
||||
name: 'test-12_tap-columnar-freezing'
|
||||
pg_major: 12
|
||||
image_tag: '12.6'
|
||||
suite: columnar_freezing
|
||||
requires: [build-12]
|
||||
- test-citus:
|
||||
name: 'test-12_check-failure'
|
||||
pg_major: 12
|
||||
image: citus/failtester
|
||||
image_tag: '12.6'
|
||||
make: check-failure
|
||||
requires: [build-12]
|
||||
|
||||
- test-citus:
|
||||
name: 'test-13_check-multi'
|
||||
pg_major: 13
|
||||
image_tag: '13.2'
|
||||
make: check-multi
|
||||
requires: [build-13]
|
||||
- test-citus:
|
||||
name: 'test-13_check-mx'
|
||||
pg_major: 13
|
||||
image_tag: '13.2'
|
||||
make: check-multi-mx
|
||||
requires: [build-13]
|
||||
- test-citus:
|
||||
name: 'test-13_check-vanilla'
|
||||
pg_major: 13
|
||||
image_tag: '13.2'
|
||||
make: check-vanilla
|
||||
requires: [build-13]
|
||||
- test-citus:
|
||||
name: 'test-13_check-isolation'
|
||||
pg_major: 13
|
||||
image_tag: '13.2'
|
||||
make: check-isolation
|
||||
requires: [build-13]
|
||||
- test-citus:
|
||||
name: 'test-13_check-worker'
|
||||
pg_major: 13
|
||||
image_tag: '13.2'
|
||||
make: check-worker
|
||||
requires: [build-13]
|
||||
- test-citus:
|
||||
name: 'test-13_check-operations'
|
||||
pg_major: 13
|
||||
image_tag: '13.2'
|
||||
make: check-operations
|
||||
requires: [build-13]
|
||||
- test-citus:
|
||||
name: 'test-13_check-follower-cluster'
|
||||
pg_major: 13
|
||||
image_tag: '13.2'
|
||||
make: check-follower-cluster
|
||||
requires: [build-13]
|
||||
- test-citus:
|
||||
name: 'test-13_check-columnar'
|
||||
pg_major: 13
|
||||
image_tag: '13.2'
|
||||
make: check-columnar
|
||||
requires: [build-13]
|
||||
- test-citus:
|
||||
name: 'test-13_check-columnar-isolation'
|
||||
pg_major: 13
|
||||
image_tag: '13.2'
|
||||
make: check-columnar-isolation
|
||||
requires: [build-13]
|
||||
- tap-test-citus:
|
||||
name: 'test_13_tap-recovery'
|
||||
pg_major: 13
|
||||
image_tag: '13.2'
|
||||
suite: recovery
|
||||
requires: [build-13]
|
||||
- tap-test-citus:
|
||||
name: 'test-13_tap-columnar-freezing'
|
||||
pg_major: 13
|
||||
image_tag: '13.2'
|
||||
suite: columnar_freezing
|
||||
requires: [build-13]
|
||||
- test-citus:
|
||||
name: 'test-13_check-failure'
|
||||
pg_major: 13
|
||||
image: citus/failtester
|
||||
image_tag: '13.2'
|
||||
make: check-failure
|
||||
requires: [build-13]
|
||||
|
||||
- test-pg-upgrade:
|
||||
name: 'test-12-13_check-pg-upgrade'
|
||||
old_pg_major: 12
|
||||
new_pg_major: 13
|
||||
image_tag: 12-13
|
||||
requires: [build-12,build-13]
|
||||
|
||||
- test-citus-upgrade:
|
||||
name: test-12_check-citus-upgrade
|
||||
pg_major: 12
|
||||
image_tag: '12.6'
|
||||
requires: [build-12]
|
||||
|
||||
- ch_benchmark:
|
||||
requires: [build-13]
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- /ch_benchmark\/.*/ # match with ch_benchmark/ prefix
|
||||
- tpcc_benchmark:
|
||||
requires: [build-13]
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- /tpcc_benchmark\/.*/ # match with tpcc_benchmark/ prefix
|
|
@ -1,7 +0,0 @@
|
|||
exclude_patterns:
|
||||
- "src/backend/distributed/utils/citus_outfuncs.c"
|
||||
- "src/backend/distributed/deparser/ruleutils_*.c"
|
||||
- "src/include/distributed/citus_nodes.h"
|
||||
- "src/backend/distributed/safeclib"
|
||||
- "src/backend/columnar/safeclib"
|
||||
- "**/vendor/"
|
|
@ -1,33 +0,0 @@
|
|||
# gdbpg.py contains scripts to nicely print the postgres datastructures
|
||||
# while in a gdb session. Since the vscode debugger is based on gdb this
|
||||
# actually also works when debugging with vscode. Providing nice tools
|
||||
# to understand the internal datastructures we are working with.
|
||||
source /root/gdbpg.py
|
||||
|
||||
# when debugging postgres it is convenient to _always_ have a breakpoint
|
||||
# trigger when an error is logged. Because .gdbinit is sourced before gdb
|
||||
# is fully attached and has the sources loaded. To make sure the breakpoint
|
||||
# is added when the library is loaded we temporary set the breakpoint pending
|
||||
# to on. After we have added out breakpoint we revert back to the default
|
||||
# configuration for breakpoint pending.
|
||||
# The breakpoint is hard to read, but at entry of the function we don't have
|
||||
# the level loaded in elevel. Instead we hardcode the location where the
|
||||
# level of the current error is stored. Also gdb doesn't understand the
|
||||
# ERROR symbol so we hardcode this to the value of ERROR. It is very unlikely
|
||||
# this value will ever change in postgres, but if it does we might need to
|
||||
# find a way to conditionally load the correct breakpoint.
|
||||
set breakpoint pending on
|
||||
break elog.c:errfinish if errordata[errordata_stack_depth].elevel == 21
|
||||
set breakpoint pending auto
|
||||
|
||||
echo \n
|
||||
echo ----------------------------------------------------------------------------------\n
|
||||
echo when attaching to a postgres backend a breakpoint will be set on elog.c:errfinish \n
|
||||
echo it will only break on errors being raised in postgres \n
|
||||
echo \n
|
||||
echo to disable this breakpoint from vscode run `-exec disable 1` in the debug console \n
|
||||
echo this assumes it's the first breakpoint loaded as it is loaded from .gdbinit \n
|
||||
echo this can be verified with `-exec info break`, enabling can be done with \n
|
||||
echo `-exec enable 1` \n
|
||||
echo ----------------------------------------------------------------------------------\n
|
||||
echo \n
|
|
@ -1 +0,0 @@
|
|||
postgresql-*.tar.bz2
|
|
@ -1,7 +0,0 @@
|
|||
\timing on
|
||||
\pset linestyle unicode
|
||||
\pset border 2
|
||||
\setenv PAGER 'pspg --no-mouse -bX --no-commandbar --no-topbar'
|
||||
\set HISTSIZE 100000
|
||||
\set PROMPT1 '\n%[%033[1m%]%M %n@%/:%> (PID: %p)%R%[%033[0m%]%# '
|
||||
\set PROMPT2 ' '
|
|
@ -1,12 +0,0 @@
|
|||
[[source]]
|
||||
url = "https://pypi.org/simple"
|
||||
verify_ssl = true
|
||||
name = "pypi"
|
||||
|
||||
[packages]
|
||||
docopt = "*"
|
||||
|
||||
[dev-packages]
|
||||
|
||||
[requires]
|
||||
python_version = "3.9"
|
|
@ -1,28 +0,0 @@
|
|||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "6956a6700ead5804aa56bd597c93bb4a13f208d2d49d3b5399365fd240ca0797"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
"python_version": "3.9"
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "pypi",
|
||||
"url": "https://pypi.org/simple",
|
||||
"verify_ssl": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"default": {
|
||||
"docopt": {
|
||||
"hashes": [
|
||||
"sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.6.2"
|
||||
}
|
||||
},
|
||||
"develop": {}
|
||||
}
|
|
@ -1,84 +0,0 @@
|
|||
#! /usr/bin/env pipenv-shebang
|
||||
"""Generate C/C++ properties file for VSCode.
|
||||
|
||||
Uses pgenv to iterate postgres versions and generate
|
||||
a C/C++ properties file for VSCode containing the
|
||||
include paths for the postgres headers.
|
||||
|
||||
Usage:
|
||||
generate_c_cpp_properties-json.py <target_path>
|
||||
generate_c_cpp_properties-json.py (-h | --help)
|
||||
generate_c_cpp_properties-json.py --version
|
||||
|
||||
Options:
|
||||
-h --help Show this screen.
|
||||
--version Show version.
|
||||
|
||||
"""
|
||||
import json
|
||||
import subprocess
|
||||
|
||||
from docopt import docopt
|
||||
|
||||
|
||||
def main(args):
|
||||
target_path = args['<target_path>']
|
||||
|
||||
output = subprocess.check_output(['pgenv', 'versions'])
|
||||
# typical output is:
|
||||
# 14.8 pgsql-14.8
|
||||
# * 15.3 pgsql-15.3
|
||||
# 16beta2 pgsql-16beta2
|
||||
# where the line marked with a * is the currently active version
|
||||
#
|
||||
# we are only interested in the first word of each line, which is the version number
|
||||
# thus we strip the whitespace and the * from the line and split it into words
|
||||
# and take the first word
|
||||
versions = [line.strip('* ').split()[0] for line in output.decode('utf-8').splitlines()]
|
||||
|
||||
# create the list of configurations per version
|
||||
configurations = []
|
||||
for version in versions:
|
||||
configurations.append(generate_configuration(version))
|
||||
|
||||
# create the json file
|
||||
c_cpp_properties = {
|
||||
"configurations": configurations,
|
||||
"version": 4
|
||||
}
|
||||
|
||||
# write the c_cpp_properties.json file
|
||||
with open(target_path, 'w') as f:
|
||||
json.dump(c_cpp_properties, f, indent=4)
|
||||
|
||||
|
||||
def generate_configuration(version):
|
||||
"""Returns a configuration for the given postgres version.
|
||||
|
||||
>>> generate_configuration('14.8')
|
||||
{
|
||||
"name": "Citus Development Configuration - Postgres 14.8",
|
||||
"includePath": [
|
||||
"/usr/local/include",
|
||||
"/home/citus/.pgenv/src/postgresql-14.8/src/**",
|
||||
"${workspaceFolder}/**",
|
||||
"${workspaceFolder}/src/include/",
|
||||
],
|
||||
"configurationProvider": "ms-vscode.makefile-tools"
|
||||
}
|
||||
"""
|
||||
return {
|
||||
"name": f"Citus Development Configuration - Postgres {version}",
|
||||
"includePath": [
|
||||
"/usr/local/include",
|
||||
f"/home/citus/.pgenv/src/postgresql-{version}/src/**",
|
||||
"${workspaceFolder}/**",
|
||||
"${workspaceFolder}/src/include/",
|
||||
],
|
||||
"configurationProvider": "ms-vscode.makefile-tools"
|
||||
}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
arguments = docopt(__doc__, version='0.1.0')
|
||||
main(arguments)
|
|
@ -1,40 +0,0 @@
|
|||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Attach Citus (devcontainer)",
|
||||
"type": "cppdbg",
|
||||
"request": "attach",
|
||||
"processId": "${command:pickProcess}",
|
||||
"program": "/home/citus/.pgenv/pgsql/bin/postgres",
|
||||
"additionalSOLibSearchPath": "/home/citus/.pgenv/pgsql/lib",
|
||||
"setupCommands": [
|
||||
{
|
||||
"text": "handle SIGUSR1 noprint nostop pass",
|
||||
"description": "let gdb not stop when SIGUSR1 is sent to process",
|
||||
"ignoreFailures": true
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Open core file",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "/home/citus/.pgenv/pgsql/bin/postgres",
|
||||
"coreDumpPath": "${input:corefile}",
|
||||
"cwd": "${workspaceFolder}",
|
||||
"MIMode": "gdb",
|
||||
}
|
||||
],
|
||||
"inputs": [
|
||||
{
|
||||
"id": "corefile",
|
||||
"type": "command",
|
||||
"command": "extension.commandvariable.file.pickFile",
|
||||
"args": {
|
||||
"dialogTitle": "Select core file",
|
||||
"include": "**/core*",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
|
@ -1,222 +0,0 @@
|
|||
FROM ubuntu:22.04 AS base
|
||||
|
||||
# environment is to make python pass an interactive shell, probably not the best timezone given a wide variety of colleagues
|
||||
ENV TZ=UTC
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
# install build tools
|
||||
RUN apt update && apt install -y \
|
||||
bison \
|
||||
bzip2 \
|
||||
cpanminus \
|
||||
curl \
|
||||
docbook-xml \
|
||||
docbook-xsl \
|
||||
flex \
|
||||
gcc \
|
||||
git \
|
||||
libcurl4-gnutls-dev \
|
||||
libicu-dev \
|
||||
libkrb5-dev \
|
||||
liblz4-dev \
|
||||
libpam0g-dev \
|
||||
libreadline-dev \
|
||||
libselinux1-dev \
|
||||
libssl-dev \
|
||||
libxml2-utils \
|
||||
libxslt-dev \
|
||||
libzstd-dev \
|
||||
locales \
|
||||
make \
|
||||
perl \
|
||||
pkg-config \
|
||||
python3 \
|
||||
python3-pip \
|
||||
software-properties-common \
|
||||
sudo \
|
||||
uuid-dev \
|
||||
valgrind \
|
||||
xsltproc \
|
||||
zlib1g-dev \
|
||||
&& add-apt-repository ppa:deadsnakes/ppa -y \
|
||||
&& apt install -y \
|
||||
python3.9-full \
|
||||
# software properties pulls in pkexec, which makes the debugger unusable in vscode
|
||||
&& apt purge -y \
|
||||
software-properties-common \
|
||||
&& apt autoremove -y \
|
||||
&& apt clean
|
||||
|
||||
RUN sudo pip3 install pipenv pipenv-shebang
|
||||
|
||||
RUN cpanm install IPC::Run
|
||||
|
||||
RUN locale-gen en_US.UTF-8
|
||||
|
||||
# add the citus user to sudoers and allow all sudoers to login without a password prompt
|
||||
RUN useradd -ms /bin/bash citus \
|
||||
&& usermod -aG sudo citus \
|
||||
&& echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
|
||||
|
||||
WORKDIR /home/citus
|
||||
USER citus
|
||||
|
||||
# run all make commands with the number of cores available
|
||||
RUN echo "export MAKEFLAGS=\"-j \$(nproc)\"" >> "/home/citus/.bashrc"
|
||||
|
||||
RUN git clone --branch v1.3.2 --depth 1 https://github.com/theory/pgenv.git .pgenv
|
||||
COPY --chown=citus:citus pgenv/config/ .pgenv/config/
|
||||
ENV PATH="/home/citus/.pgenv/bin:${PATH}"
|
||||
ENV PATH="/home/citus/.pgenv/pgsql/bin:${PATH}"
|
||||
|
||||
USER citus
|
||||
|
||||
# build postgres versions separately for effective parrallelism and caching of already built versions when changing only certain versions
|
||||
FROM base AS pg15
|
||||
RUN MAKEFLAGS="-j $(nproc)" pgenv build 15.13
|
||||
RUN rm .pgenv/src/*.tar*
|
||||
RUN make -C .pgenv/src/postgresql-*/ clean
|
||||
RUN make -C .pgenv/src/postgresql-*/src/include install
|
||||
|
||||
# create a staging directory with all files we want to copy from our pgenv build
|
||||
# we will copy the contents of the staged folder into the final image at once
|
||||
RUN mkdir .pgenv-staging/
|
||||
RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/
|
||||
RUN rm .pgenv-staging/config/default.conf
|
||||
|
||||
FROM base AS pg16
|
||||
RUN MAKEFLAGS="-j $(nproc)" pgenv build 16.9
|
||||
RUN rm .pgenv/src/*.tar*
|
||||
RUN make -C .pgenv/src/postgresql-*/ clean
|
||||
RUN make -C .pgenv/src/postgresql-*/src/include install
|
||||
|
||||
# create a staging directory with all files we want to copy from our pgenv build
|
||||
# we will copy the contents of the staged folder into the final image at once
|
||||
RUN mkdir .pgenv-staging/
|
||||
RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/
|
||||
RUN rm .pgenv-staging/config/default.conf
|
||||
|
||||
FROM base AS pg17
|
||||
RUN MAKEFLAGS="-j $(nproc)" pgenv build 17.5
|
||||
RUN rm .pgenv/src/*.tar*
|
||||
RUN make -C .pgenv/src/postgresql-*/ clean
|
||||
RUN make -C .pgenv/src/postgresql-*/src/include install
|
||||
|
||||
# create a staging directory with all files we want to copy from our pgenv build
|
||||
# we will copy the contents of the staged folder into the final image at once
|
||||
RUN mkdir .pgenv-staging/
|
||||
RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/
|
||||
RUN rm .pgenv-staging/config/default.conf
|
||||
|
||||
FROM base AS uncrustify-builder
|
||||
|
||||
RUN sudo apt update && sudo apt install -y cmake tree
|
||||
|
||||
WORKDIR /uncrustify
|
||||
RUN curl -L https://github.com/uncrustify/uncrustify/archive/uncrustify-0.68.1.tar.gz | tar xz
|
||||
WORKDIR /uncrustify/uncrustify-uncrustify-0.68.1/
|
||||
RUN mkdir build
|
||||
WORKDIR /uncrustify/uncrustify-uncrustify-0.68.1/build/
|
||||
RUN cmake ..
|
||||
RUN MAKEFLAGS="-j $(nproc)" make -s
|
||||
|
||||
RUN make install DESTDIR=/uncrustify
|
||||
|
||||
# builder for all pipenv's to get them contained in a single layer
|
||||
FROM base AS pipenv
|
||||
|
||||
WORKDIR /workspaces/citus/
|
||||
|
||||
# tools to sync pgenv with vscode
|
||||
COPY --chown=citus:citus .vscode/Pipfile .vscode/Pipfile.lock .devcontainer/.vscode/
|
||||
RUN ( cd .devcontainer/.vscode && pipenv install )
|
||||
|
||||
# environment to run our failure tests
|
||||
COPY --chown=citus:citus src/ src/
|
||||
RUN ( cd src/test/regress && pipenv install )
|
||||
|
||||
# assemble the final container by copying over the artifacts from separately build containers
|
||||
FROM base AS devcontainer
|
||||
|
||||
LABEL org.opencontainers.image.source=https://github.com/citusdata/citus
|
||||
LABEL org.opencontainers.image.description="Development container for the Citus project"
|
||||
LABEL org.opencontainers.image.licenses=AGPL-3.0-only
|
||||
|
||||
RUN yes | sudo unminimize
|
||||
|
||||
# install developer productivity tools
|
||||
RUN sudo apt update \
|
||||
&& sudo apt install -y \
|
||||
autoconf2.69 \
|
||||
bash-completion \
|
||||
fswatch \
|
||||
gdb \
|
||||
htop \
|
||||
libdbd-pg-perl \
|
||||
libdbi-perl \
|
||||
lsof \
|
||||
man \
|
||||
net-tools \
|
||||
psmisc \
|
||||
pspg \
|
||||
tree \
|
||||
vim \
|
||||
&& sudo apt clean
|
||||
|
||||
# Since gdb will run in the context of the root user when debugging citus we will need to both
|
||||
# download the gdbpg.py script as the root user, into their home directory, as well as add .gdbinit
|
||||
# as a file owned by root
|
||||
# This will make that as soon as the debugger attaches to a postgres backend (or frankly any other process)
|
||||
# the gdbpg.py script will be sourced and the developer can direcly use it.
|
||||
RUN sudo curl -o /root/gdbpg.py https://raw.githubusercontent.com/tvesely/gdbpg/6065eee7872457785f830925eac665aa535caf62/gdbpg.py
|
||||
COPY --chown=root:root .gdbinit /root/
|
||||
|
||||
# install developer dependencies in the global environment
|
||||
RUN --mount=type=bind,source=requirements.txt,target=requirements.txt pip install -r requirements.txt
|
||||
|
||||
# for persistent bash history across devcontainers we need to have
|
||||
# a) a directory to store the history in
|
||||
# b) a prompt command to append the history to the file
|
||||
# c) specify the history file to store the history in
|
||||
# b and c are done in the .bashrc to make it persistent across shells only
|
||||
RUN sudo install -d -o citus -g citus /commandhistory \
|
||||
&& echo "export PROMPT_COMMAND='history -a' && export HISTFILE=/commandhistory/.bash_history" >> "/home/citus/.bashrc"
|
||||
|
||||
# install citus-dev
|
||||
RUN git clone --branch develop https://github.com/citusdata/tools.git citus-tools \
|
||||
&& ( cd citus-tools/citus_dev && pipenv install ) \
|
||||
&& mkdir -p ~/.local/bin \
|
||||
&& ln -s /home/citus/citus-tools/citus_dev/citus_dev-pipenv .local/bin/citus_dev \
|
||||
&& sudo make -C citus-tools/uncrustify install bindir=/usr/local/bin pkgsysconfdir=/usr/local/etc/ \
|
||||
&& mkdir -p ~/.local/share/bash-completion/completions/ \
|
||||
&& ln -s ~/citus-tools/citus_dev/bash_completion ~/.local/share/bash-completion/completions/citus_dev
|
||||
|
||||
# TODO some LC_ALL errors, possibly solved by locale-gen
|
||||
RUN git clone https://github.com/so-fancy/diff-so-fancy.git \
|
||||
&& mkdir -p ~/.local/bin \
|
||||
&& ln -s /home/citus/diff-so-fancy/diff-so-fancy .local/bin/
|
||||
|
||||
COPY --link --from=uncrustify-builder /uncrustify/usr/ /usr/
|
||||
|
||||
COPY --link --from=pg15 /home/citus/.pgenv-staging/ /home/citus/.pgenv/
|
||||
COPY --link --from=pg16 /home/citus/.pgenv-staging/ /home/citus/.pgenv/
|
||||
COPY --link --from=pg17 /home/citus/.pgenv-staging/ /home/citus/.pgenv/
|
||||
|
||||
COPY --link --from=pipenv /home/citus/.local/share/virtualenvs/ /home/citus/.local/share/virtualenvs/
|
||||
|
||||
# place to run your cluster with citus_dev
|
||||
VOLUME /data
|
||||
RUN sudo mkdir /data \
|
||||
&& sudo chown citus:citus /data
|
||||
|
||||
COPY --chown=citus:citus .psqlrc .
|
||||
|
||||
# with the copy linking of layers github actions seem to misbehave with the ownership of the
|
||||
# directories leading upto the link, hence a small patch layer to have to right ownerships set
|
||||
RUN sudo chown --from=root:root citus:citus -R ~
|
||||
|
||||
# sets default pg version
|
||||
RUN pgenv switch 17.5
|
||||
|
||||
# make connecting to the coordinator easy
|
||||
ENV PGPORT=9700
|
|
@ -1,11 +0,0 @@
|
|||
|
||||
init: ../.vscode/c_cpp_properties.json ../.vscode/launch.json
|
||||
|
||||
../.vscode:
|
||||
mkdir -p ../.vscode
|
||||
|
||||
../.vscode/launch.json: ../.vscode .vscode/launch.json
|
||||
cp .vscode/launch.json ../.vscode/launch.json
|
||||
|
||||
../.vscode/c_cpp_properties.json: ../.vscode
|
||||
./.vscode/generate_c_cpp_properties-json.py ../.vscode/c_cpp_properties.json
|
|
@ -1,37 +0,0 @@
|
|||
{
|
||||
"image": "ghcr.io/citusdata/citus-devcontainer:main",
|
||||
"runArgs": [
|
||||
"--cap-add=SYS_PTRACE",
|
||||
"--ulimit=core=-1",
|
||||
],
|
||||
"forwardPorts": [
|
||||
9700
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"eamodio.gitlens",
|
||||
"GitHub.copilot-chat",
|
||||
"GitHub.copilot",
|
||||
"github.vscode-github-actions",
|
||||
"github.vscode-pull-request-github",
|
||||
"ms-vscode.cpptools-extension-pack",
|
||||
"ms-vsliveshare.vsliveshare",
|
||||
"rioj7.command-variable",
|
||||
],
|
||||
"settings": {
|
||||
"files.exclude": {
|
||||
"**/*.o": true,
|
||||
"**/.deps/": true,
|
||||
}
|
||||
},
|
||||
}
|
||||
},
|
||||
"mounts": [
|
||||
"type=volume,target=/data",
|
||||
"source=citus-bashhistory,target=/commandhistory,type=volume",
|
||||
],
|
||||
"updateContentCommand": "./configure",
|
||||
"postCreateCommand": "make -C .devcontainer/",
|
||||
}
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
PGENV_MAKE_OPTIONS=(-s)
|
||||
|
||||
PGENV_CONFIGURE_OPTIONS=(
|
||||
--enable-debug
|
||||
--enable-depend
|
||||
--enable-cassert
|
||||
--enable-tap-tests
|
||||
'CFLAGS=-ggdb -Og -g3 -fno-omit-frame-pointer -DUSE_VALGRIND'
|
||||
--with-openssl
|
||||
--with-libxml
|
||||
--with-libxslt
|
||||
--with-uuid=e2fs
|
||||
--with-icu
|
||||
--with-lz4
|
||||
)
|
|
@ -1,9 +0,0 @@
|
|||
black==23.11.0
|
||||
click==8.1.7
|
||||
isort==5.12.0
|
||||
mypy-extensions==1.0.0
|
||||
packaging==23.2
|
||||
pathspec==0.11.2
|
||||
platformdirs==4.0.0
|
||||
tomli==2.0.1
|
||||
typing_extensions==4.8.0
|
|
@ -1,28 +0,0 @@
|
|||
[[source]]
|
||||
name = "pypi"
|
||||
url = "https://pypi.python.org/simple"
|
||||
verify_ssl = true
|
||||
|
||||
[packages]
|
||||
mitmproxy = {editable = true, ref = "main", git = "https://github.com/citusdata/mitmproxy.git"}
|
||||
construct = "*"
|
||||
docopt = "==0.6.2"
|
||||
cryptography = ">=41.0.4"
|
||||
pytest = "*"
|
||||
psycopg = "*"
|
||||
filelock = "*"
|
||||
pytest-asyncio = "*"
|
||||
pytest-timeout = "*"
|
||||
pytest-xdist = "*"
|
||||
pytest-repeat = "*"
|
||||
pyyaml = "*"
|
||||
werkzeug = "==2.3.7"
|
||||
|
||||
[dev-packages]
|
||||
black = "*"
|
||||
isort = "*"
|
||||
flake8 = "*"
|
||||
flake8-bugbear = "*"
|
||||
|
||||
[requires]
|
||||
python_version = "3.9"
|
|
@ -17,7 +17,13 @@ trim_trailing_whitespace = true
|
|||
insert_final_newline = unset
|
||||
trim_trailing_whitespace = unset
|
||||
|
||||
[*.{sql,sh,py,toml}]
|
||||
# Don't change test/regress/output directory, this needs to be a separate rule
|
||||
# for some reason
|
||||
[/src/test/regress/output/**]
|
||||
insert_final_newline = unset
|
||||
trim_trailing_whitespace = unset
|
||||
|
||||
[*.{sql,sh,py}]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
tab_width = 4
|
||||
|
|
7
.flake8
|
@ -1,7 +0,0 @@
|
|||
[flake8]
|
||||
# E203 is ignored for black
|
||||
extend-ignore = E203
|
||||
# black will truncate to 88 characters usually, but long string literals it
|
||||
# might keep. That's fine in most cases unless it gets really excessive.
|
||||
max-line-length = 150
|
||||
exclude = .git,__pycache__,vendor,tmp_*
|
|
@ -16,6 +16,7 @@ README.* conflict-marker-size=32
|
|||
|
||||
# Test output files that contain extra whitespace
|
||||
*.out -whitespace
|
||||
src/test/regress/output/*.source -whitespace
|
||||
|
||||
# These files are maintained or generated elsewhere. We take them as is.
|
||||
configure -whitespace
|
||||
|
@ -25,9 +26,10 @@ configure -whitespace
|
|||
|
||||
# except these exceptions...
|
||||
src/backend/distributed/utils/citus_outfuncs.c -citus-style
|
||||
src/backend/distributed/deparser/ruleutils_15.c -citus-style
|
||||
src/backend/distributed/deparser/ruleutils_16.c -citus-style
|
||||
src/backend/distributed/deparser/ruleutils_17.c -citus-style
|
||||
src/backend/distributed/utils/pg11_snprintf.c -citus-style
|
||||
src/backend/distributed/deparser/ruleutils_11.c -citus-style
|
||||
src/backend/distributed/deparser/ruleutils_12.c -citus-style
|
||||
src/backend/distributed/deparser/ruleutils_13.c -citus-style
|
||||
src/backend/distributed/commands/index_pg_source.c -citus-style
|
||||
|
||||
src/include/distributed/citus_nodes.h -citus-style
|
||||
|
|
|
@ -1,23 +0,0 @@
|
|||
name: 'Parallelization matrix'
|
||||
inputs:
|
||||
count:
|
||||
required: false
|
||||
default: 32
|
||||
outputs:
|
||||
json:
|
||||
value: ${{ steps.generate_matrix.outputs.json }}
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Generate parallelization matrix
|
||||
id: generate_matrix
|
||||
shell: bash
|
||||
run: |-
|
||||
json_array="{\"include\": ["
|
||||
for ((i = 1; i <= ${{ inputs.count }}; i++)); do
|
||||
json_array+="{\"id\":\"$i\"},"
|
||||
done
|
||||
json_array=${json_array%,}
|
||||
json_array+=" ]}"
|
||||
echo "json=$json_array" >> "$GITHUB_OUTPUT"
|
||||
echo "json=$json_array"
|
|
@ -1,38 +0,0 @@
|
|||
name: save_logs_and_results
|
||||
inputs:
|
||||
folder:
|
||||
required: false
|
||||
default: "log"
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: actions/upload-artifact@v4.6.0
|
||||
name: Upload logs
|
||||
with:
|
||||
name: ${{ inputs.folder }}
|
||||
if-no-files-found: ignore
|
||||
path: |
|
||||
src/test/**/proxy.output
|
||||
src/test/**/results/
|
||||
src/test/**/tmp_check/master/log
|
||||
src/test/**/tmp_check/worker.57638/log
|
||||
src/test/**/tmp_check/worker.57637/log
|
||||
src/test/**/*.diffs
|
||||
src/test/**/out/ddls.sql
|
||||
src/test/**/out/queries.sql
|
||||
src/test/**/logfile_*
|
||||
/tmp/pg_upgrade_newData_logs
|
||||
- name: Publish regression.diffs
|
||||
run: |-
|
||||
diffs="$(find src/test/regress -name "*.diffs" -exec cat {} \;)"
|
||||
if ! [ -z "$diffs" ]; then
|
||||
echo '```diff' >> $GITHUB_STEP_SUMMARY
|
||||
echo -E "$diffs" >> $GITHUB_STEP_SUMMARY
|
||||
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||
echo -E $diffs
|
||||
fi
|
||||
shell: bash
|
||||
- name: Print stack traces
|
||||
run: "./ci/print_stack_trace.sh"
|
||||
if: failure()
|
||||
shell: bash
|
|
@ -1,35 +0,0 @@
|
|||
name: setup_extension
|
||||
inputs:
|
||||
pg_major:
|
||||
required: false
|
||||
skip_installation:
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Expose $PG_MAJOR to Github Env
|
||||
run: |-
|
||||
if [ -z "${{ inputs.pg_major }}" ]; then
|
||||
echo "PG_MAJOR=${PG_MAJOR}" >> $GITHUB_ENV
|
||||
else
|
||||
echo "PG_MAJOR=${{ inputs.pg_major }}" >> $GITHUB_ENV
|
||||
fi
|
||||
shell: bash
|
||||
- uses: actions/download-artifact@v4.1.8
|
||||
with:
|
||||
name: build-${{ env.PG_MAJOR }}
|
||||
- name: Install Extension
|
||||
if: ${{ inputs.skip_installation == 'false' }}
|
||||
run: tar xfv "install-$PG_MAJOR.tar" --directory /
|
||||
shell: bash
|
||||
- name: Configure
|
||||
run: |-
|
||||
chown -R circleci .
|
||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
||||
gosu circleci ./configure --without-pg-version-check
|
||||
shell: bash
|
||||
- name: Enable core dumps
|
||||
run: ulimit -c unlimited
|
||||
shell: bash
|
|
@ -1,27 +0,0 @@
|
|||
name: coverage
|
||||
inputs:
|
||||
flags:
|
||||
required: false
|
||||
codecov_token:
|
||||
required: true
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: codecov/codecov-action@v3
|
||||
with:
|
||||
flags: ${{ inputs.flags }}
|
||||
token: ${{ inputs.codecov_token }}
|
||||
verbose: true
|
||||
gcov: true
|
||||
- name: Create codeclimate coverage
|
||||
run: |-
|
||||
lcov --directory . --capture --output-file lcov.info
|
||||
lcov --remove lcov.info -o lcov.info '/usr/*'
|
||||
sed "s=^SF:$PWD/=SF:=g" -i lcov.info # relative pats are required by codeclimate
|
||||
mkdir -p /tmp/codeclimate
|
||||
cc-test-reporter format-coverage -t lcov -o /tmp/codeclimate/${{ inputs.flags }}.json lcov.info
|
||||
shell: bash
|
||||
- uses: actions/upload-artifact@v4.6.0
|
||||
with:
|
||||
path: "/tmp/codeclimate/*.json"
|
||||
name: codeclimate-${{ inputs.flags }}
|
|
@ -1,3 +0,0 @@
|
|||
base:
|
||||
- ".* warning: ignoring old recipe for target [`']check'"
|
||||
- ".* warning: overriding recipe for target [`']check'"
|
|
@ -1,51 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
# Function to get the OS version
|
||||
get_rpm_os_version() {
|
||||
if [[ -f /etc/centos-release ]]; then
|
||||
cat /etc/centos-release | awk '{print $4}'
|
||||
elif [[ -f /etc/oracle-release ]]; then
|
||||
cat /etc/oracle-release | awk '{print $5}'
|
||||
else
|
||||
echo "Unknown"
|
||||
fi
|
||||
}
|
||||
|
||||
package_type=${1}
|
||||
|
||||
# Since $HOME is set in GH_Actions as /github/home, pyenv fails to create virtualenvs.
|
||||
# For this script, we set $HOME to /root and then set it back to /github/home.
|
||||
GITHUB_HOME="${HOME}"
|
||||
export HOME="/root"
|
||||
|
||||
eval "$(pyenv init -)"
|
||||
pyenv versions
|
||||
pyenv virtualenv ${PACKAGING_PYTHON_VERSION} packaging_env
|
||||
pyenv activate packaging_env
|
||||
|
||||
git clone -b v0.8.27 --depth=1 https://github.com/citusdata/tools.git tools
|
||||
python3 -m pip install -r tools/packaging_automation/requirements.txt
|
||||
|
||||
|
||||
echo "Package type: ${package_type}"
|
||||
echo "OS version: $(get_rpm_os_version)"
|
||||
|
||||
# For RHEL 7, we need to install urllib3<2 due to below execution error
|
||||
# ImportError: urllib3 v2.0 only supports OpenSSL 1.1.1+, currently the 'ssl'
|
||||
# module is compiled with 'OpenSSL 1.0.2k-fips 26 Jan 2017'.
|
||||
# See: https://github.com/urllib3/urllib3/issues/2168
|
||||
if [[ ${package_type} == "rpm" && $(get_rpm_os_version) == 7* ]]; then
|
||||
python3 -m pip uninstall -y urllib3
|
||||
python3 -m pip install 'urllib3<2'
|
||||
fi
|
||||
|
||||
python3 -m tools.packaging_automation.validate_build_output --output_file output.log \
|
||||
--ignore_file .github/packaging/packaging_ignore.yml \
|
||||
--package_type ${package_type}
|
||||
pyenv deactivate
|
||||
# Set $HOME back to /github/home
|
||||
export HOME=${GITHUB_HOME}
|
||||
|
||||
# Print the output to the console
|
|
@ -1,545 +0,0 @@
|
|||
name: Build & Test
|
||||
run-name: Build & Test - ${{ github.event.pull_request.title || github.ref_name }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
skip_test_flakyness:
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
push:
|
||||
branches:
|
||||
- "main"
|
||||
- "release-*"
|
||||
pull_request:
|
||||
types: [opened, reopened,synchronize]
|
||||
merge_group:
|
||||
jobs:
|
||||
# Since GHA does not interpolate env varibles in matrix context, we need to
|
||||
# define them in a separate job and use them in other jobs.
|
||||
params:
|
||||
runs-on: ubuntu-latest
|
||||
name: Initialize parameters
|
||||
outputs:
|
||||
build_image_name: "ghcr.io/citusdata/extbuilder"
|
||||
test_image_name: "ghcr.io/citusdata/exttester"
|
||||
citusupgrade_image_name: "ghcr.io/citusdata/citusupgradetester"
|
||||
fail_test_image_name: "ghcr.io/citusdata/failtester"
|
||||
pgupgrade_image_name: "ghcr.io/citusdata/pgupgradetester"
|
||||
style_checker_image_name: "ghcr.io/citusdata/stylechecker"
|
||||
style_checker_tools_version: "0.8.18"
|
||||
sql_snapshot_pg_version: "17.5"
|
||||
image_suffix: "-dev-d28f316"
|
||||
pg15_version: '{ "major": "15", "full": "15.13" }'
|
||||
pg16_version: '{ "major": "16", "full": "16.9" }'
|
||||
pg17_version: '{ "major": "17", "full": "17.5" }'
|
||||
upgrade_pg_versions: "15.13-16.9-17.5"
|
||||
steps:
|
||||
# Since GHA jobs need at least one step we use a noop step here.
|
||||
- name: Set up parameters
|
||||
run: echo 'noop'
|
||||
check-sql-snapshots:
|
||||
needs: params
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ${{ needs.params.outputs.build_image_name }}:${{ needs.params.outputs.sql_snapshot_pg_version }}${{ needs.params.outputs.image_suffix }}
|
||||
options: --user root
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Check Snapshots
|
||||
run: |
|
||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
||||
ci/check_sql_snapshots.sh
|
||||
check-style:
|
||||
needs: params
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ${{ needs.params.outputs.style_checker_image_name }}:${{ needs.params.outputs.style_checker_tools_version }}${{ needs.params.outputs.image_suffix }}
|
||||
steps:
|
||||
- name: Check Snapshots
|
||||
run: |
|
||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Check C Style
|
||||
run: citus_indent --check
|
||||
- name: Check Python style
|
||||
run: black --check .
|
||||
- name: Check Python import order
|
||||
run: isort --check .
|
||||
- name: Check Python lints
|
||||
run: flake8 .
|
||||
- name: Fix whitespace
|
||||
run: ci/editorconfig.sh && git diff --exit-code
|
||||
- name: Remove useless declarations
|
||||
run: ci/remove_useless_declarations.sh && git diff --cached --exit-code
|
||||
- name: Sort and group includes
|
||||
run: ci/sort_and_group_includes.sh && git diff --exit-code
|
||||
- name: Normalize test output
|
||||
run: ci/normalize_expected.sh && git diff --exit-code
|
||||
- name: Check for C-style comments in migration files
|
||||
run: ci/disallow_c_comments_in_migrations.sh && git diff --exit-code
|
||||
- name: 'Check for comment--cached ns that start with # character in spec files'
|
||||
run: ci/disallow_hash_comments_in_spec_files.sh && git diff --exit-code
|
||||
- name: Check for gitignore entries .for source files
|
||||
run: ci/fix_gitignore.sh && git diff --exit-code
|
||||
- name: Check for lengths of changelog entries
|
||||
run: ci/disallow_long_changelog_entries.sh
|
||||
- name: Check for banned C API usage
|
||||
run: ci/banned.h.sh
|
||||
- name: Check for tests missing in schedules
|
||||
run: ci/check_all_tests_are_run.sh
|
||||
- name: Check if all CI scripts are actually run
|
||||
run: ci/check_all_ci_scripts_are_run.sh
|
||||
- name: Check if all GUCs are sorted alphabetically
|
||||
run: ci/check_gucs_are_alphabetically_sorted.sh
|
||||
- name: Check for missing downgrade scripts
|
||||
run: ci/check_migration_files.sh
|
||||
build:
|
||||
needs: params
|
||||
name: Build for PG${{ fromJson(matrix.pg_version).major }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
image_name:
|
||||
- ${{ needs.params.outputs.build_image_name }}
|
||||
image_suffix:
|
||||
- ${{ needs.params.outputs.image_suffix}}
|
||||
pg_version:
|
||||
- ${{ needs.params.outputs.pg15_version }}
|
||||
- ${{ needs.params.outputs.pg16_version }}
|
||||
- ${{ needs.params.outputs.pg17_version }}
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: "${{ matrix.image_name }}:${{ fromJson(matrix.pg_version).full }}${{ matrix.image_suffix }}"
|
||||
options: --user root
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Expose $PG_MAJOR to Github Env
|
||||
run: echo "PG_MAJOR=${PG_MAJOR}" >> $GITHUB_ENV
|
||||
shell: bash
|
||||
- name: Build
|
||||
run: "./ci/build-citus.sh"
|
||||
shell: bash
|
||||
- uses: actions/upload-artifact@v4.6.0
|
||||
with:
|
||||
name: build-${{ env.PG_MAJOR }}
|
||||
path: |-
|
||||
./build-${{ env.PG_MAJOR }}/*
|
||||
./install-${{ env.PG_MAJOR }}.tar
|
||||
test-citus:
|
||||
name: PG${{ fromJson(matrix.pg_version).major }} - ${{ matrix.make }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
suite:
|
||||
- regress
|
||||
image_name:
|
||||
- ${{ needs.params.outputs.test_image_name }}
|
||||
pg_version:
|
||||
- ${{ needs.params.outputs.pg15_version }}
|
||||
- ${{ needs.params.outputs.pg16_version }}
|
||||
- ${{ needs.params.outputs.pg17_version }}
|
||||
make:
|
||||
- check-split
|
||||
- check-multi
|
||||
- check-multi-1
|
||||
- check-multi-mx
|
||||
- check-vanilla
|
||||
- check-isolation
|
||||
- check-operations
|
||||
- check-follower-cluster
|
||||
- check-columnar
|
||||
- check-columnar-isolation
|
||||
- check-enterprise
|
||||
- check-enterprise-isolation
|
||||
- check-enterprise-isolation-logicalrep-1
|
||||
- check-enterprise-isolation-logicalrep-2
|
||||
- check-enterprise-isolation-logicalrep-3
|
||||
include:
|
||||
- make: check-failure
|
||||
pg_version: ${{ needs.params.outputs.pg15_version }}
|
||||
suite: regress
|
||||
image_name: ${{ needs.params.outputs.fail_test_image_name }}
|
||||
- make: check-failure
|
||||
pg_version: ${{ needs.params.outputs.pg16_version }}
|
||||
suite: regress
|
||||
image_name: ${{ needs.params.outputs.fail_test_image_name }}
|
||||
- make: check-failure
|
||||
pg_version: ${{ needs.params.outputs.pg17_version }}
|
||||
suite: regress
|
||||
image_name: ${{ needs.params.outputs.fail_test_image_name }}
|
||||
- make: check-enterprise-failure
|
||||
pg_version: ${{ needs.params.outputs.pg15_version }}
|
||||
suite: regress
|
||||
image_name: ${{ needs.params.outputs.fail_test_image_name }}
|
||||
- make: check-enterprise-failure
|
||||
pg_version: ${{ needs.params.outputs.pg16_version }}
|
||||
suite: regress
|
||||
image_name: ${{ needs.params.outputs.fail_test_image_name }}
|
||||
- make: check-enterprise-failure
|
||||
pg_version: ${{ needs.params.outputs.pg17_version }}
|
||||
suite: regress
|
||||
image_name: ${{ needs.params.outputs.fail_test_image_name }}
|
||||
- make: check-pytest
|
||||
pg_version: ${{ needs.params.outputs.pg15_version }}
|
||||
suite: regress
|
||||
image_name: ${{ needs.params.outputs.fail_test_image_name }}
|
||||
- make: check-pytest
|
||||
pg_version: ${{ needs.params.outputs.pg16_version }}
|
||||
suite: regress
|
||||
image_name: ${{ needs.params.outputs.fail_test_image_name }}
|
||||
- make: check-pytest
|
||||
pg_version: ${{ needs.params.outputs.pg17_version }}
|
||||
suite: regress
|
||||
image_name: ${{ needs.params.outputs.fail_test_image_name }}
|
||||
- make: installcheck
|
||||
suite: cdc
|
||||
image_name: ${{ needs.params.outputs.test_image_name }}
|
||||
pg_version: ${{ needs.params.outputs.pg15_version }}
|
||||
- make: installcheck
|
||||
suite: cdc
|
||||
image_name: ${{ needs.params.outputs.test_image_name }}
|
||||
pg_version: ${{ needs.params.outputs.pg16_version }}
|
||||
- make: installcheck
|
||||
suite: cdc
|
||||
image_name: ${{ needs.params.outputs.test_image_name }}
|
||||
pg_version: ${{ needs.params.outputs.pg17_version }}
|
||||
- make: check-query-generator
|
||||
pg_version: ${{ needs.params.outputs.pg15_version }}
|
||||
suite: regress
|
||||
image_name: ${{ needs.params.outputs.fail_test_image_name }}
|
||||
- make: check-query-generator
|
||||
pg_version: ${{ needs.params.outputs.pg16_version }}
|
||||
suite: regress
|
||||
image_name: ${{ needs.params.outputs.fail_test_image_name }}
|
||||
- make: check-query-generator
|
||||
pg_version: ${{ needs.params.outputs.pg17_version }}
|
||||
suite: regress
|
||||
image_name: ${{ needs.params.outputs.fail_test_image_name }}
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: "${{ matrix.image_name }}:${{ fromJson(matrix.pg_version).full }}${{ needs.params.outputs.image_suffix }}"
|
||||
options: --user root --dns=8.8.8.8
|
||||
# Due to Github creates a default network for each job, we need to use
|
||||
# --dns= to have similar DNS settings as our other CI systems or local
|
||||
# machines. Otherwise, we may see different results.
|
||||
needs:
|
||||
- params
|
||||
- build
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: "./.github/actions/setup_extension"
|
||||
- name: Run Test
|
||||
run: gosu circleci make -C src/test/${{ matrix.suite }} ${{ matrix.make }}
|
||||
timeout-minutes: 20
|
||||
- uses: "./.github/actions/save_logs_and_results"
|
||||
if: always()
|
||||
with:
|
||||
folder: ${{ fromJson(matrix.pg_version).major }}_${{ matrix.make }}
|
||||
- uses: "./.github/actions/upload_coverage"
|
||||
if: always()
|
||||
with:
|
||||
flags: ${{ env.PG_MAJOR }}_${{ matrix.suite }}_${{ matrix.make }}
|
||||
codecov_token: ${{ secrets.CODECOV_TOKEN }}
|
||||
test-arbitrary-configs:
|
||||
name: PG${{ fromJson(matrix.pg_version).major }} - check-arbitrary-configs-${{ matrix.parallel }}
|
||||
runs-on: ["self-hosted", "1ES.Pool=1es-gha-citusdata-pool"]
|
||||
container:
|
||||
image: "${{ matrix.image_name }}:${{ fromJson(matrix.pg_version).full }}${{ needs.params.outputs.image_suffix }}"
|
||||
options: --user root
|
||||
needs:
|
||||
- params
|
||||
- build
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
image_name:
|
||||
- ${{ needs.params.outputs.fail_test_image_name }}
|
||||
pg_version:
|
||||
- ${{ needs.params.outputs.pg15_version }}
|
||||
- ${{ needs.params.outputs.pg16_version }}
|
||||
- ${{ needs.params.outputs.pg17_version }}
|
||||
parallel: [0,1,2,3,4,5] # workaround for running 6 parallel jobs
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: "./.github/actions/setup_extension"
|
||||
- name: Test arbitrary configs
|
||||
run: |-
|
||||
# we use parallel jobs to split the tests into 6 parts and run them in parallel
|
||||
# the script below extracts the tests for the current job
|
||||
N=6 # Total number of jobs (see matrix.parallel)
|
||||
X=${{ matrix.parallel }} # Current job number
|
||||
TESTS=$(src/test/regress/citus_tests/print_test_names.py |
|
||||
tr '\n' ',' | awk -v N="$N" -v X="$X" -F, '{
|
||||
split("", parts)
|
||||
for (i = 1; i <= NF; i++) {
|
||||
parts[i % N] = parts[i % N] $i ","
|
||||
}
|
||||
print substr(parts[X], 1, length(parts[X])-1)
|
||||
}')
|
||||
echo $TESTS
|
||||
gosu circleci \
|
||||
make -C src/test/regress \
|
||||
check-arbitrary-configs parallel=4 CONFIGS=$TESTS
|
||||
- uses: "./.github/actions/save_logs_and_results"
|
||||
if: always()
|
||||
with:
|
||||
folder: ${{ env.PG_MAJOR }}_arbitrary_configs_${{ matrix.parallel }}
|
||||
- uses: "./.github/actions/upload_coverage"
|
||||
if: always()
|
||||
with:
|
||||
flags: ${{ env.PG_MAJOR }}_arbitrary_configs_${{ matrix.parallel }}
|
||||
codecov_token: ${{ secrets.CODECOV_TOKEN }}
|
||||
test-pg-upgrade:
|
||||
name: PG${{ matrix.old_pg_major }}-PG${{ matrix.new_pg_major }} - check-pg-upgrade
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: "${{ needs.params.outputs.pgupgrade_image_name }}:${{ needs.params.outputs.upgrade_pg_versions }}${{ needs.params.outputs.image_suffix }}"
|
||||
options: --user root
|
||||
needs:
|
||||
- params
|
||||
- build
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- old_pg_major: 15
|
||||
new_pg_major: 16
|
||||
- old_pg_major: 16
|
||||
new_pg_major: 17
|
||||
- old_pg_major: 15
|
||||
new_pg_major: 17
|
||||
env:
|
||||
old_pg_major: ${{ matrix.old_pg_major }}
|
||||
new_pg_major: ${{ matrix.new_pg_major }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: "./.github/actions/setup_extension"
|
||||
with:
|
||||
pg_major: "${{ env.old_pg_major }}"
|
||||
- uses: "./.github/actions/setup_extension"
|
||||
with:
|
||||
pg_major: "${{ env.new_pg_major }}"
|
||||
- name: Install and test postgres upgrade
|
||||
run: |-
|
||||
gosu circleci \
|
||||
make -C src/test/regress \
|
||||
check-pg-upgrade \
|
||||
old-bindir=/usr/lib/postgresql/${{ env.old_pg_major }}/bin \
|
||||
new-bindir=/usr/lib/postgresql/${{ env.new_pg_major }}/bin
|
||||
- name: Copy pg_upgrade logs for newData dir
|
||||
run: |-
|
||||
mkdir -p /tmp/pg_upgrade_newData_logs
|
||||
if ls src/test/regress/tmp_upgrade/newData/*.log 1> /dev/null 2>&1; then
|
||||
cp src/test/regress/tmp_upgrade/newData/*.log /tmp/pg_upgrade_newData_logs
|
||||
fi
|
||||
if: failure()
|
||||
- uses: "./.github/actions/save_logs_and_results"
|
||||
if: always()
|
||||
with:
|
||||
folder: ${{ env.old_pg_major }}_${{ env.new_pg_major }}_upgrade
|
||||
- uses: "./.github/actions/upload_coverage"
|
||||
if: always()
|
||||
with:
|
||||
flags: ${{ env.old_pg_major }}_${{ env.new_pg_major }}_upgrade
|
||||
codecov_token: ${{ secrets.CODECOV_TOKEN }}
|
||||
test-citus-upgrade:
|
||||
name: PG${{ fromJson(needs.params.outputs.pg15_version).major }} - check-citus-upgrade
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: "${{ needs.params.outputs.citusupgrade_image_name }}:${{ fromJson(needs.params.outputs.pg15_version).full }}${{ needs.params.outputs.image_suffix }}"
|
||||
options: --user root
|
||||
needs:
|
||||
- params
|
||||
- build
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: "./.github/actions/setup_extension"
|
||||
with:
|
||||
skip_installation: true
|
||||
- name: Install and test citus upgrade
|
||||
run: |-
|
||||
# run make check-citus-upgrade for all citus versions
|
||||
# the image has ${CITUS_VERSIONS} set with all verions it contains the binaries of
|
||||
for citus_version in ${CITUS_VERSIONS}; do \
|
||||
gosu circleci \
|
||||
make -C src/test/regress \
|
||||
check-citus-upgrade \
|
||||
bindir=/usr/lib/postgresql/${PG_MAJOR}/bin \
|
||||
citus-old-version=${citus_version} \
|
||||
citus-pre-tar=/install-pg${PG_MAJOR}-citus${citus_version}.tar \
|
||||
citus-post-tar=${GITHUB_WORKSPACE}/install-$PG_MAJOR.tar; \
|
||||
done;
|
||||
# run make check-citus-upgrade-mixed for all citus versions
|
||||
# the image has ${CITUS_VERSIONS} set with all verions it contains the binaries of
|
||||
for citus_version in ${CITUS_VERSIONS}; do \
|
||||
gosu circleci \
|
||||
make -C src/test/regress \
|
||||
check-citus-upgrade-mixed \
|
||||
citus-old-version=${citus_version} \
|
||||
bindir=/usr/lib/postgresql/${PG_MAJOR}/bin \
|
||||
citus-pre-tar=/install-pg${PG_MAJOR}-citus${citus_version}.tar \
|
||||
citus-post-tar=${GITHUB_WORKSPACE}/install-$PG_MAJOR.tar; \
|
||||
done;
|
||||
- uses: "./.github/actions/save_logs_and_results"
|
||||
if: always()
|
||||
with:
|
||||
folder: ${{ env.PG_MAJOR }}_citus_upgrade
|
||||
- uses: "./.github/actions/upload_coverage"
|
||||
if: always()
|
||||
with:
|
||||
flags: ${{ env.PG_MAJOR }}_citus_upgrade
|
||||
codecov_token: ${{ secrets.CODECOV_TOKEN }}
|
||||
upload-coverage:
|
||||
if: always()
|
||||
env:
|
||||
CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }}
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ${{ needs.params.outputs.test_image_name }}:${{ fromJson(needs.params.outputs.pg17_version).full }}${{ needs.params.outputs.image_suffix }}
|
||||
needs:
|
||||
- params
|
||||
- test-citus
|
||||
- test-arbitrary-configs
|
||||
- test-citus-upgrade
|
||||
- test-pg-upgrade
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4.1.8
|
||||
with:
|
||||
pattern: codeclimate*
|
||||
path: codeclimate
|
||||
merge-multiple: true
|
||||
- name: Upload coverage results to Code Climate
|
||||
run: |-
|
||||
cc-test-reporter sum-coverage codeclimate/*.json -o total.json
|
||||
cc-test-reporter upload-coverage -i total.json
|
||||
ch_benchmark:
|
||||
name: CH Benchmark
|
||||
if: startsWith(github.ref, 'refs/heads/ch_benchmark/')
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- build
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{ secrets.AZURE_CREDENTIALS }}
|
||||
- name: install dependencies and run ch_benchmark tests
|
||||
uses: azure/CLI@v1
|
||||
with:
|
||||
inlineScript: |
|
||||
cd ./src/test/hammerdb
|
||||
chmod +x run_hammerdb.sh
|
||||
run_hammerdb.sh citusbot_ch_benchmark_rg
|
||||
tpcc_benchmark:
|
||||
name: TPCC Benchmark
|
||||
if: startsWith(github.ref, 'refs/heads/tpcc_benchmark/')
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- build
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{ secrets.AZURE_CREDENTIALS }}
|
||||
- name: install dependencies and run tpcc_benchmark tests
|
||||
uses: azure/CLI@v1
|
||||
with:
|
||||
inlineScript: |
|
||||
cd ./src/test/hammerdb
|
||||
chmod +x run_hammerdb.sh
|
||||
run_hammerdb.sh citusbot_tpcc_benchmark_rg
|
||||
prepare_parallelization_matrix_32:
|
||||
name: Prepare parallelization matrix
|
||||
if: ${{ needs.test-flakyness-pre.outputs.tests != ''}}
|
||||
needs: test-flakyness-pre
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
json: ${{ steps.parallelization.outputs.json }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: "./.github/actions/parallelization"
|
||||
id: parallelization
|
||||
with:
|
||||
count: 32
|
||||
test-flakyness-pre:
|
||||
name: Detect regression tests need to be ran
|
||||
if: ${{ !inputs.skip_test_flakyness }}}
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
outputs:
|
||||
tests: ${{ steps.detect-regression-tests.outputs.tests }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Detect regression tests need to be ran
|
||||
id: detect-regression-tests
|
||||
run: |-
|
||||
detected_changes=$(git diff origin/main... --name-only --diff-filter=AM | (grep 'src/test/regress/sql/.*\.sql\|src/test/regress/spec/.*\.spec\|src/test/regress/citus_tests/test/test_.*\.py' || true))
|
||||
tests=${detected_changes}
|
||||
|
||||
# split the tests to be skipped --today we only skip upgrade tests
|
||||
skipped_tests=""
|
||||
not_skipped_tests=""
|
||||
for test in $tests; do
|
||||
if [[ $test =~ ^src/test/regress/sql/upgrade_ ]]; then
|
||||
skipped_tests="$skipped_tests $test"
|
||||
else
|
||||
not_skipped_tests="$not_skipped_tests $test"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ ! -z "$skipped_tests" ]; then
|
||||
echo "Skipped tests " $skipped_tests
|
||||
fi
|
||||
|
||||
if [ -z "$not_skipped_tests" ]; then
|
||||
echo "Not detected any tests that flaky test detection should run"
|
||||
else
|
||||
echo "Detected tests " $not_skipped_tests
|
||||
fi
|
||||
|
||||
echo 'tests<<EOF' >> $GITHUB_OUTPUT
|
||||
echo "$not_skipped_tests" >> "$GITHUB_OUTPUT"
|
||||
echo 'EOF' >> $GITHUB_OUTPUT
|
||||
test-flakyness:
|
||||
if: ${{ needs.test-flakyness-pre.outputs.tests != ''}}
|
||||
name: Test flakyness
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ${{ needs.params.outputs.fail_test_image_name }}:${{ fromJson(needs.params.outputs.pg17_version).full }}${{ needs.params.outputs.image_suffix }}
|
||||
options: --user root
|
||||
env:
|
||||
runs: 8
|
||||
needs:
|
||||
- params
|
||||
- build
|
||||
- test-flakyness-pre
|
||||
- prepare_parallelization_matrix_32
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJson(needs.prepare_parallelization_matrix_32.outputs.json) }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/download-artifact@v4.1.8
|
||||
- uses: "./.github/actions/setup_extension"
|
||||
- name: Run minimal tests
|
||||
run: |-
|
||||
tests="${{ needs.test-flakyness-pre.outputs.tests }}"
|
||||
tests_array=($tests)
|
||||
for test in "${tests_array[@]}"
|
||||
do
|
||||
test_name=$(echo "$test" | sed -r "s/.+\/(.+)\..+/\1/")
|
||||
gosu circleci src/test/regress/citus_tests/run_test.py $test_name --repeat ${{ env.runs }} --use-whole-schedule-line
|
||||
done
|
||||
shell: bash
|
||||
- uses: "./.github/actions/save_logs_and_results"
|
||||
if: always()
|
||||
with:
|
||||
folder: test_flakyness_parallel_${{ matrix.id }}
|
|
@ -1,79 +0,0 @@
|
|||
name: "CodeQL"
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '59 23 * * 6'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: [ 'cpp', 'python']
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v3
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
|
||||
- name: Install package dependencies
|
||||
run: |
|
||||
# Create the file repository configuration:
|
||||
sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main 15" > /etc/apt/sources.list.d/pgdg.list'
|
||||
# Import the repository signing key:
|
||||
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
autotools-dev \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
curl \
|
||||
debhelper \
|
||||
devscripts \
|
||||
fakeroot \
|
||||
flex \
|
||||
libcurl4-openssl-dev \
|
||||
libdistro-info-perl \
|
||||
libedit-dev \
|
||||
libfile-fcntllock-perl \
|
||||
libicu-dev \
|
||||
libkrb5-dev \
|
||||
liblz4-1 \
|
||||
liblz4-dev \
|
||||
libpam0g-dev \
|
||||
libreadline-dev \
|
||||
libselinux1-dev \
|
||||
libssl-dev \
|
||||
libxslt-dev \
|
||||
libzstd-dev \
|
||||
libzstd1 \
|
||||
lintian \
|
||||
postgresql-server-dev-15 \
|
||||
postgresql-server-dev-all \
|
||||
python3-pip \
|
||||
python3-setuptools \
|
||||
wget \
|
||||
zlib1g-dev
|
||||
|
||||
|
||||
- name: Configure, Build and Install Citus
|
||||
if: matrix.language == 'cpp'
|
||||
run: |
|
||||
./configure
|
||||
make -sj8
|
||||
sudo make install-all
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v3
|
|
@ -1,54 +0,0 @@
|
|||
name: "Build devcontainer"
|
||||
|
||||
# Since building of containers can be quite time consuming, and take up some storage,
|
||||
# there is no need to finish a build for a tag if new changes are concurrently being made.
|
||||
# This cancels any previous builds for the same tag, and only the latest one will be kept.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- ".devcontainer/**"
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
attestations: write
|
||||
id-token: write
|
||||
steps:
|
||||
-
|
||||
name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: |
|
||||
ghcr.io/citusdata/citus-devcontainer
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=sha
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
-
|
||||
name: 'Login to GitHub Container Registry'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{github.actor}}
|
||||
password: ${{secrets.GITHUB_TOKEN}}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: "{{defaultContext}}:.devcontainer"
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
|
@ -1,79 +0,0 @@
|
|||
name: Flaky test debugging
|
||||
run-name: Flaky test debugging - ${{ inputs.flaky_test }} (${{ inputs.flaky_test_runs_per_job }}x${{ inputs.flaky_test_parallel_jobs }})
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
flaky_test:
|
||||
required: true
|
||||
type: string
|
||||
description: Test to run
|
||||
flaky_test_runs_per_job:
|
||||
required: false
|
||||
default: 8
|
||||
type: number
|
||||
description: Number of times to run the test
|
||||
flaky_test_parallel_jobs:
|
||||
required: false
|
||||
default: 32
|
||||
type: number
|
||||
description: Number of parallel jobs to run
|
||||
jobs:
|
||||
build:
|
||||
name: Build Citus
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ${{ vars.build_image_name }}:${{ vars.pg15_version }}${{ vars.image_suffix }}
|
||||
options: --user root
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Configure, Build, and Install
|
||||
run: |
|
||||
echo "PG_MAJOR=${PG_MAJOR}" >> $GITHUB_ENV
|
||||
./ci/build-citus.sh
|
||||
shell: bash
|
||||
- uses: actions/upload-artifact@v4.6.0
|
||||
with:
|
||||
name: build-${{ env.PG_MAJOR }}
|
||||
path: |-
|
||||
./build-${{ env.PG_MAJOR }}/*
|
||||
./install-${{ env.PG_MAJOR }}.tar
|
||||
prepare_parallelization_matrix:
|
||||
name: Prepare parallelization matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
json: ${{ steps.parallelization.outputs.json }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: "./.github/actions/parallelization"
|
||||
id: parallelization
|
||||
with:
|
||||
count: ${{ inputs.flaky_test_parallel_jobs }}
|
||||
test_flakyness:
|
||||
name: Test flakyness
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ${{ vars.fail_test_image_name }}:${{ vars.pg15_version }}${{ vars.image_suffix }}
|
||||
options: --user root
|
||||
needs:
|
||||
[build, prepare_parallelization_matrix]
|
||||
env:
|
||||
test: "${{ inputs.flaky_test }}"
|
||||
runs: "${{ inputs.flaky_test_runs_per_job }}"
|
||||
skip: false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJson(needs.prepare_parallelization_matrix.outputs.json) }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: "./.github/actions/setup_extension"
|
||||
- name: Run minimal tests
|
||||
run: |-
|
||||
gosu circleci src/test/regress/citus_tests/run_test.py ${{ env.test }} --repeat ${{ env.runs }} --use-whole-schedule-line
|
||||
shell: bash
|
||||
- uses: "./.github/actions/save_logs_and_results"
|
||||
if: always()
|
||||
with:
|
||||
folder: check_flakyness_parallel_${{ matrix.id }}
|
|
@ -1,177 +0,0 @@
|
|||
name: Build tests in packaging images
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, reopened,synchronize]
|
||||
merge_group:
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
|
||||
get_postgres_versions_from_file:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
pg_versions: ${{ steps.get-postgres-versions.outputs.pg_versions }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2
|
||||
- name: Get Postgres Versions
|
||||
id: get-postgres-versions
|
||||
run: |
|
||||
set -euxo pipefail
|
||||
# Postgres versions are stored in .github/workflows/build_and_test.yml
|
||||
# file in json strings with major and full keys.
|
||||
# Below command extracts the versions and get the unique values.
|
||||
pg_versions=$(cat .github/workflows/build_and_test.yml | grep -oE '"major": "[0-9]+", "full": "[0-9.]+"' | sed -E 's/"major": "([0-9]+)", "full": "([0-9.]+)"/\1/g' | sort | uniq | tr '\n', ',')
|
||||
pg_versions_array="[ ${pg_versions} ]"
|
||||
echo "Supported PG Versions: ${pg_versions_array}"
|
||||
# Below line is needed to set the output variable to be used in the next job
|
||||
echo "pg_versions=${pg_versions_array}" >> $GITHUB_OUTPUT
|
||||
shell: bash
|
||||
rpm_build_tests:
|
||||
name: rpm_build_tests
|
||||
needs: get_postgres_versions_from_file
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# While we use separate images for different Postgres versions in rpm
|
||||
# based distros
|
||||
# For this reason, we need to use a "matrix" to generate names of
|
||||
# rpm images, e.g. citus/packaging:centos-7-pg12
|
||||
packaging_docker_image:
|
||||
- oraclelinux-8
|
||||
- almalinux-8
|
||||
- almalinux-9
|
||||
POSTGRES_VERSION: ${{ fromJson(needs.get_postgres_versions_from_file.outputs.pg_versions) }}
|
||||
|
||||
container:
|
||||
image: citus/packaging:${{ matrix.packaging_docker_image }}-pg${{ matrix.POSTGRES_VERSION }}
|
||||
options: --user root
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set Postgres and python parameters for rpm based distros
|
||||
run: |
|
||||
echo "/usr/pgsql-${{ matrix.POSTGRES_VERSION }}/bin" >> $GITHUB_PATH
|
||||
echo "/root/.pyenv/bin:$PATH" >> $GITHUB_PATH
|
||||
echo "PACKAGING_PYTHON_VERSION=3.8.16" >> $GITHUB_ENV
|
||||
|
||||
- name: Configure
|
||||
run: |
|
||||
echo "Current Shell:$0"
|
||||
echo "GCC Version: $(gcc --version)"
|
||||
./configure 2>&1 | tee output.log
|
||||
|
||||
- name: Make clean
|
||||
run: |
|
||||
make clean
|
||||
|
||||
- name: Make
|
||||
run: |
|
||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
||||
make CFLAGS="-Wno-missing-braces" -sj$(cat /proc/cpuinfo | grep "core id" | wc -l) 2>&1 | tee -a output.log
|
||||
|
||||
# Check the exit code of the make command
|
||||
make_exit_code=${PIPESTATUS[0]}
|
||||
|
||||
# If the make command returned a non-zero exit code, exit with the same code
|
||||
if [[ $make_exit_code -ne 0 ]]; then
|
||||
echo "make command failed with exit code $make_exit_code"
|
||||
exit $make_exit_code
|
||||
fi
|
||||
|
||||
- name: Make install
|
||||
run: |
|
||||
make CFLAGS="-Wno-missing-braces" install 2>&1 | tee -a output.log
|
||||
|
||||
- name: Validate output
|
||||
env:
|
||||
POSTGRES_VERSION: ${{ matrix.POSTGRES_VERSION }}
|
||||
PACKAGING_DOCKER_IMAGE: ${{ matrix.packaging_docker_image }}
|
||||
run: |
|
||||
echo "Postgres version: ${POSTGRES_VERSION}"
|
||||
./.github/packaging/validate_build_output.sh "rpm"
|
||||
|
||||
deb_build_tests:
|
||||
name: deb_build_tests
|
||||
needs: get_postgres_versions_from_file
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# On deb based distros, we use the same docker image for
|
||||
# builds based on different Postgres versions because deb
|
||||
# based images include all postgres installations.
|
||||
# For this reason, we have multiple runs --which is 3 today--
|
||||
# for each deb based image and we use POSTGRES_VERSION to set
|
||||
# PG_CONFIG variable in each of those runs.
|
||||
packaging_docker_image:
|
||||
- debian-bookworm-all
|
||||
- debian-bullseye-all
|
||||
- ubuntu-focal-all
|
||||
- ubuntu-jammy-all
|
||||
|
||||
POSTGRES_VERSION: ${{ fromJson(needs.get_postgres_versions_from_file.outputs.pg_versions) }}
|
||||
|
||||
container:
|
||||
image: citus/packaging:${{ matrix.packaging_docker_image }}
|
||||
options: --user root
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set pg_config path and python parameters for deb based distros
|
||||
run: |
|
||||
echo "PG_CONFIG=/usr/lib/postgresql/${{ matrix.POSTGRES_VERSION }}/bin/pg_config" >> $GITHUB_ENV
|
||||
echo "/root/.pyenv/bin:$PATH" >> $GITHUB_PATH
|
||||
echo "PACKAGING_PYTHON_VERSION=3.8.16" >> $GITHUB_ENV
|
||||
|
||||
- name: Configure
|
||||
run: |
|
||||
echo "Current Shell:$0"
|
||||
echo "GCC Version: $(gcc --version)"
|
||||
./configure 2>&1 | tee output.log
|
||||
|
||||
- name: Make clean
|
||||
run: |
|
||||
make clean
|
||||
|
||||
- name: Make
|
||||
shell: bash
|
||||
run: |
|
||||
set -e
|
||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
||||
make -sj$(cat /proc/cpuinfo | grep "core id" | wc -l) 2>&1 | tee -a output.log
|
||||
|
||||
# Check the exit code of the make command
|
||||
make_exit_code=${PIPESTATUS[0]}
|
||||
|
||||
# If the make command returned a non-zero exit code, exit with the same code
|
||||
if [[ $make_exit_code -ne 0 ]]; then
|
||||
echo "make command failed with exit code $make_exit_code"
|
||||
exit $make_exit_code
|
||||
fi
|
||||
|
||||
|
||||
- name: Make install
|
||||
run: |
|
||||
make install 2>&1 | tee -a output.log
|
||||
|
||||
- name: Validate output
|
||||
env:
|
||||
POSTGRES_VERSION: ${{ matrix.POSTGRES_VERSION }}
|
||||
PACKAGING_DOCKER_IMAGE: ${{ matrix.packaging_docker_image }}
|
||||
run: |
|
||||
echo "Postgres version: ${POSTGRES_VERSION}"
|
||||
./.github/packaging/validate_build_output.sh "deb"
|
|
@ -38,9 +38,6 @@ lib*.pc
|
|||
/Makefile.global
|
||||
/src/Makefile.custom
|
||||
/compile_commands.json
|
||||
/src/backend/distributed/cdc/build-cdc-*/*
|
||||
/src/test/cdc/tmp_check/*
|
||||
|
||||
|
||||
# temporary files vim creates
|
||||
*.swp
|
||||
|
@ -54,7 +51,3 @@ lib*.pc
|
|||
|
||||
# style related temporary outputs
|
||||
*.uncrustify
|
||||
.venv
|
||||
|
||||
# added output when modifying check_gucs_are_alphabetically_sorted.sh
|
||||
guc.out
|
||||
|
|
1687
CHANGELOG.md
|
@ -1,9 +0,0 @@
|
|||
# Microsoft Open Source Code of Conduct
|
||||
|
||||
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
|
||||
|
||||
Resources:
|
||||
|
||||
- [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
|
||||
- [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
|
||||
- Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
|
180
CONTRIBUTING.md
|
@ -11,65 +11,8 @@ sign a Contributor License Agreement (CLA). For an explanation of
|
|||
why we ask this as well as instructions for how to proceed, see the
|
||||
[Microsoft CLA](https://cla.opensource.microsoft.com/).
|
||||
|
||||
### Devcontainer / Github Codespaces
|
||||
|
||||
The easiest way to start contributing is via our devcontainer. This container works both locally in visual studio code with docker-desktop/docker-for-mac as well as [Github Codespaces](https://github.com/features/codespaces). To open the project in vscode you will need the [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers). For codespaces you will need to [create a new codespace](https://codespace.new/citusdata/citus).
|
||||
|
||||
With the extension installed you can run the following from the command pallet to get started
|
||||
|
||||
```
|
||||
> Dev Containers: Clone Repository in Container Volume...
|
||||
```
|
||||
|
||||
In the subsequent popup paste the url to the repo and hit enter.
|
||||
|
||||
```
|
||||
https://github.com/citusdata/citus
|
||||
```
|
||||
|
||||
This will create an isolated Workspace in vscode, complete with all tools required to build, test and run the Citus extension. We keep this container up to date with the supported postgres versions as well as the exact versions of tooling we use.
|
||||
|
||||
To quickly start we suggest splitting your terminal once to have two shells. The left one in the `/workspaces/citus`, the second one changed to `/data`. The left terminal will be used to interact with the project, the right one with a testing cluster.
|
||||
|
||||
To get citus installed from source we run `make install -s` in the first terminal. Once installed you can start a Citus cluster in the second terminal via `citus_dev make citus`. The cluster will run in the background, and can be interacted with via `citus_dev`. To get an overview of the available commands.
|
||||
|
||||
With the Citus cluster running you can connect to the coordinator in the first terminal via `psql -p9700`. Because the coordinator is the most common entrypoint the `PGPORT` environment is set accordingly, so a simple `psql` will connect directly to the coordinator.
|
||||
|
||||
### Debugging in the VS code
|
||||
|
||||
1. Start Debugging: Press F5 in VS Code to start debugging. When prompted, you'll need to attach the debugger to the appropriate PostgreSQL process.
|
||||
|
||||
2. Identify the Process: If you're running a psql command, take note of the PID that appears in your psql prompt. For example:
|
||||
```
|
||||
[local] citus@citus:9700 (PID: 5436)=#
|
||||
```
|
||||
This PID (5436 in this case) indicates the process that you should attach the debugger to.
|
||||
If you are uncertain about which process to attach, you can list all running PostgreSQL processes using the following command:
|
||||
```
|
||||
ps aux | grep postgres
|
||||
```
|
||||
|
||||
Look for the process associated with the PID you noted. For example:
|
||||
```
|
||||
citus 5436 0.0 0.0 0 0 ? S 14:00 0:00 postgres: citus citus
|
||||
```
|
||||
4. Attach the Debugger: Once you've identified the correct PID, select that process when prompted in VS Code to attach the debugger. You should now be able to debug the PostgreSQL session tied to the psql command.
|
||||
|
||||
5. Set Breakpoints and Debug: With the debugger attached, you can set breakpoints within the code. This allows you to step through the code execution, inspect variables, and fully debug the PostgreSQL instance running in your container.
|
||||
|
||||
### Getting and building
|
||||
|
||||
[PostgreSQL documentation](https://www.postgresql.org/support/versioning/) has a
|
||||
section on upgrade policy.
|
||||
|
||||
We always recommend that all users run the latest available minor release [for PostgreSQL] for whatever major version is in use.
|
||||
|
||||
We expect Citus users to honor this recommendation and use latest available
|
||||
PostgreSQL minor release. Failure to do so may result in failures in our test
|
||||
suite. There are some known improvements in PG test architecture such as
|
||||
[this commit](https://github.com/postgres/postgres/commit/3f323956128ff8589ce4d3a14e8b950837831803)
|
||||
that are missing in earlier minor versions.
|
||||
|
||||
#### Mac
|
||||
|
||||
1. Install Xcode
|
||||
|
@ -87,19 +30,9 @@ that are missing in earlier minor versions.
|
|||
|
||||
cd citus
|
||||
./configure
|
||||
# If you have already installed the project, you need to clean it first
|
||||
make clean
|
||||
make
|
||||
make install
|
||||
# Optionally, you might instead want to use `make install-all`
|
||||
# since `multi_extension` regression test would fail due to missing downgrade scripts.
|
||||
cd src/test/regress
|
||||
|
||||
pip install pipenv
|
||||
pipenv --rm
|
||||
pipenv install
|
||||
pipenv shell
|
||||
|
||||
make check
|
||||
```
|
||||
|
||||
|
@ -114,10 +47,10 @@ that are missing in earlier minor versions.
|
|||
sudo apt-key add -
|
||||
sudo apt-get update
|
||||
|
||||
sudo apt-get install -y postgresql-server-dev-14 postgresql-14 \
|
||||
sudo apt-get install -y postgresql-server-dev-13 postgresql-13 \
|
||||
autoconf flex git libcurl4-gnutls-dev libicu-dev \
|
||||
libkrb5-dev liblz4-dev libpam0g-dev libreadline-dev \
|
||||
libselinux1-dev libssl-dev libxslt1-dev libzstd-dev \
|
||||
libselinux1-dev libssl-dev libxslt-dev libzstd-dev \
|
||||
make uuid-dev
|
||||
```
|
||||
|
||||
|
@ -127,19 +60,9 @@ that are missing in earlier minor versions.
|
|||
git clone https://github.com/citusdata/citus.git
|
||||
cd citus
|
||||
./configure
|
||||
# If you have already installed the project previously, you need to clean it first
|
||||
make clean
|
||||
make
|
||||
sudo make install
|
||||
# Optionally, you might instead want to use `sudo make install-all`
|
||||
# since `multi_extension` regression test would fail due to missing downgrade scripts.
|
||||
cd src/test/regress
|
||||
|
||||
pip install pipenv
|
||||
pipenv --rm
|
||||
pipenv install
|
||||
pipenv shell
|
||||
|
||||
make check
|
||||
```
|
||||
|
||||
|
@ -171,33 +94,59 @@ that are missing in earlier minor versions.
|
|||
```bash
|
||||
sudo yum update -y
|
||||
sudo yum groupinstall -y 'Development Tools'
|
||||
sudo yum install -y postgresql14-devel postgresql14-server \
|
||||
sudo yum install -y postgresql13-devel postgresql13-server \
|
||||
git libcurl-devel libxml2-devel libxslt-devel \
|
||||
libzstd-devel llvm-toolset-7-clang llvm5.0 lz4-devel \
|
||||
openssl-devel pam-devel readline-devel
|
||||
|
||||
git clone https://github.com/citusdata/citus.git
|
||||
cd citus
|
||||
PG_CONFIG=/usr/pgsql-14/bin/pg_config ./configure
|
||||
# If you have already installed the project previously, you need to clean it first
|
||||
make clean
|
||||
PG_CONFIG=/usr/pgsql-13/bin/pg_config ./configure
|
||||
make
|
||||
sudo make install
|
||||
# Optionally, you might instead want to use `sudo make install-all`
|
||||
# since `multi_extension` regression test would fail due to missing downgrade scripts.
|
||||
cd src/test/regress
|
||||
|
||||
pip install pipenv
|
||||
pipenv --rm
|
||||
pipenv install
|
||||
pipenv shell
|
||||
|
||||
make check
|
||||
```
|
||||
|
||||
### Following our coding conventions
|
||||
|
||||
Our coding conventions are documented in [STYLEGUIDE.md](STYLEGUIDE.md).
|
||||
CircleCI will automatically reject any PRs which do not follow our coding
|
||||
conventions. The easiest way to ensure your PR adheres to those conventions is
|
||||
to use the [citus_indent](https://github.com/citusdata/tools/tree/develop/uncrustify)
|
||||
tool. This tool uses `uncrustify` under the hood.
|
||||
|
||||
```bash
|
||||
# Uncrustify changes the way it formats code every release a bit. To make sure
|
||||
# everyone formats consistently we use version 0.68.1:
|
||||
curl -L https://github.com/uncrustify/uncrustify/archive/uncrustify-0.68.1.tar.gz | tar xz
|
||||
cd uncrustify-uncrustify-0.68.1/
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
make -j5
|
||||
sudo make install
|
||||
cd ../..
|
||||
|
||||
git clone https://github.com/citusdata/tools.git
|
||||
cd tools
|
||||
make uncrustify/.install
|
||||
```
|
||||
|
||||
Once you've done that, you can run the `make reindent` command from the top
|
||||
directory to recursively check and correct the style of any source files in the
|
||||
current directory. Under the hood, `make reindent` will run `citus_indent` and
|
||||
some other style corrections for you.
|
||||
|
||||
You can also run the following in the directory of this repository to
|
||||
automatically format all the files that you have changed before committing:
|
||||
|
||||
```bash
|
||||
cat > .git/hooks/pre-commit << __EOF__
|
||||
#!/bin/bash
|
||||
citus_indent --check --diff || { citus_indent --diff; exit 1; }
|
||||
__EOF__
|
||||
chmod +x .git/hooks/pre-commit
|
||||
```
|
||||
|
||||
### Making SQL changes
|
||||
|
||||
|
@ -234,50 +183,3 @@ style `#include` statements like this:
|
|||
|
||||
Any other SQL you can put directly in the main sql file, e.g.
|
||||
`src/backend/distributed/sql/citus--8.3-1--9.0-1.sql`.
|
||||
|
||||
### Backporting a commit to a release branch
|
||||
|
||||
1. Check out the release branch that you want to backport to `git checkout release-11.3`
|
||||
2. Make sure you have the latest changes `git pull`
|
||||
3. Create a new release branch with a unique name `git checkout -b release-11.3-<yourname>`
|
||||
4. Cherry-pick the commit that you want to backport `git cherry-pick -x <sha>` (the `-x` is important)
|
||||
5. Push the branch `git push`
|
||||
6. Wait for tests to pass
|
||||
7. If the cherry-pick required non-trivial merge conflicts, create a PR and ask
|
||||
for a review.
|
||||
8. After the tests pass on CI, fast-forward the release branch `git push origin release-11.3-<yourname>:release-11.3`
|
||||
|
||||
### Running tests
|
||||
|
||||
See [`src/test/regress/README.md`](https://github.com/citusdata/citus/blob/master/src/test/regress/README.md)
|
||||
|
||||
### Documentation
|
||||
|
||||
User-facing documentation is published on [docs.citusdata.com](https://docs.citusdata.com/). When adding a new feature, function, or setting, you can open a pull request or issue against the [Citus docs repo](https://github.com/citusdata/citus_docs/).
|
||||
|
||||
Detailed descriptions of the implementation for Citus developers are provided in the [Citus Technical Documentation](src/backend/distributed/README.md). It is currently a single file for ease of searching. Please update the documentation if you make any changes that affect the design or add major new features.
|
||||
|
||||
# Making a pull request ready for reviews
|
||||
|
||||
Asking for help and asking for reviews are two different things. When you're asking for help, you're asking for someone to help you with something that you're not expected to know.
|
||||
|
||||
But when you're asking for a review, you're asking for someone to review your work and provide feedback. So, when you're asking for a review, you're expected to make sure that:
|
||||
|
||||
* Your changes don't perform **unnecessary line addition / deletions / style changes on unrelated files / lines**.
|
||||
|
||||
* All CI jobs are **passing**, including **style checks** and **flaky test detection jobs**. Note that if you're an external contributor, you don't have to wait CI jobs to run (and finish) because they don't get automatically triggered for external contributors.
|
||||
|
||||
* Your PR has necessary amount of **tests** and that they're passing.
|
||||
|
||||
* You separated as much as possible work into **separate PRs**, e.g., a prerequisite bugfix, a refactoring etc..
|
||||
|
||||
* Your PR doesn't introduce a typo or something that you can easily fix yourself.
|
||||
|
||||
* After all CI jobs pass, code-coverage measurement job (CodeCov as of today) then kicks in. That's why it's important to make the **tests passing** first. At that point, you're expected to check **CodeCov annotations** that can be seen in the **Files Changed** tab and expected to make sure that it doesn't complain about any lines that are not covered. For example, it's ok if CodeCov complains about an `ereport()` call that you put for an "unexpected-but-better-than-crashing" case, but it's not ok if it complains about an uncovered `if` branch that you added.
|
||||
|
||||
* And finally, perform a **self-review** to make sure that:
|
||||
* Code and code-comments reflects the idea **without requiring an extra explanation** via a chat message / email / PR comment.
|
||||
This is important because we don't expect developers to reach out to author / read about the whole discussion in the PR to understand the idea behind a commit merged into `main` branch.
|
||||
* PR description is clear enough.
|
||||
* If-and-only-if you're **introducing a user facing change / bugfix**, your PR has a line that starts with `DESCRIPTION: <Present simple tense word that starts with a capital letter, e.g., Adds support for / Fixes / Disallows>`.
|
||||
* **Commit messages** are clear enough if the commits are doing logically different things.
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
# Devcontainer
|
||||
|
||||
## Coredumps
|
||||
When postgres/citus crashes, there is the option to create a coredump. This is useful for debugging the issue. Coredumps are enabled in the devcontainer by default. However, not all environments are configured correctly out of the box. The most important configuration that is not standardized is the `core_pattern`. The configuration can be verified from the container, however, you cannot change this setting from inside the container as the filesystem containing this setting is in read only mode while inside the container.
|
||||
|
||||
To verify if corefiles are written run the following command in a terminal. This shows the filename pattern with which the corefile will be written.
|
||||
```bash
|
||||
cat /proc/sys/kernel/core_pattern
|
||||
```
|
||||
|
||||
This should be configured with a relative path or simply a simple filename, such as `core`. When your environment shows an absolute path you will need to change this setting. How to change this setting depends highly on the underlying system as the setting needs to be changed on the kernel of the host running the container.
|
||||
|
||||
You can put any pattern in `/proc/sys/kernel/core_pattern` as you see fit. eg. You can add the PID to the core pattern in one of two ways;
|
||||
- You either include `%p` in the core_pattern. This gets substituted with the PID of the crashing process.
|
||||
- Alternatively you could set `/proc/sys/kernel/core_uses_pid` to `1` in the same way as you set `core_pattern`. This will append the PID to the corefile if `%p` is not explicitly contained in the core_pattern.
|
||||
|
||||
When a coredump is written you can use the debug/launch configuration `Open core file` which is preconfigured in the devcontainer. This will open a fileprompt that lists all coredumps that are found in your workspace. When you want to debug coredumps from `citus_dev` that are run in your `/data` directory, you can add the data directory to your workspace. In the command pallet of vscode you can run `>Workspace: Add Folder to Workspace...` and select the `/data` directory. This will allow you to open the coredumps from the `/data` directory in the `Open core file` debug configuration.
|
||||
|
||||
### Windows (docker desktop)
|
||||
When running in docker desktop on windows you will most likely need to change this setting. The linux guest in WSL2 that runs your container is the `docker-desktop` environment. The easiest way to get onto the host, where you can change this setting, is to open a powershell window and verify you have the docker-desktop environment listed.
|
||||
|
||||
```powershell
|
||||
wsl --list
|
||||
```
|
||||
|
||||
Among others this should list both `docker-desktop` and `docker-desktop-data`. You can then open a shell in the `docker-desktop` environment.
|
||||
|
||||
```powershell
|
||||
wsl -d docker-desktop
|
||||
```
|
||||
|
||||
Inside this shell you can verify that you have the right environment by running
|
||||
|
||||
```bash
|
||||
cat /proc/sys/kernel/core_pattern
|
||||
```
|
||||
|
||||
This should show the same configuration as the one you see inside the devcontainer. You can then change the setting by running the following command.
|
||||
This will change the setting for the current session. If you want to make the change permanent you will need to add this to a startup script.
|
||||
|
||||
```bash
|
||||
echo "core" > /proc/sys/kernel/core_pattern
|
||||
```
|
24
Makefile
|
@ -13,16 +13,10 @@ include Makefile.global
|
|||
|
||||
all: extension
|
||||
|
||||
|
||||
# build columnar only
|
||||
columnar:
|
||||
$(MAKE) -C src/backend/columnar all
|
||||
# build extension
|
||||
extension: $(citus_top_builddir)/src/include/citus_version.h columnar
|
||||
extension: $(citus_top_builddir)/src/include/citus_version.h
|
||||
$(MAKE) -C src/backend/distributed/ all
|
||||
install-columnar: columnar
|
||||
$(MAKE) -C src/backend/columnar install
|
||||
install-extension: extension install-columnar
|
||||
install-extension: extension
|
||||
$(MAKE) -C src/backend/distributed/ install
|
||||
install-headers: extension
|
||||
$(MKDIR_P) '$(DESTDIR)$(includedir_server)/distributed/'
|
||||
|
@ -33,35 +27,27 @@ install-headers: extension
|
|||
|
||||
clean-extension:
|
||||
$(MAKE) -C src/backend/distributed/ clean
|
||||
$(MAKE) -C src/backend/columnar/ clean
|
||||
clean-full:
|
||||
$(MAKE) -C src/backend/distributed/ clean-full
|
||||
.PHONY: extension install-extension clean-extension clean-full
|
||||
|
||||
# Add to generic targets
|
||||
install: install-extension install-headers
|
||||
install-downgrades:
|
||||
$(MAKE) -C src/backend/distributed/ install-downgrades
|
||||
install-all: install-headers
|
||||
$(MAKE) -C src/backend/columnar/ install-all
|
||||
$(MAKE) -C src/backend/distributed/ install-all
|
||||
|
||||
|
||||
# Add to generic targets
|
||||
install: install-extension install-headers
|
||||
clean: clean-extension
|
||||
|
||||
# apply or check style
|
||||
reindent:
|
||||
${citus_abs_top_srcdir}/ci/fix_style.sh
|
||||
check-style:
|
||||
black . --check --quiet
|
||||
isort . --check --quiet
|
||||
flake8
|
||||
cd ${citus_abs_top_srcdir} && citus_indent --quiet --check
|
||||
.PHONY: reindent check-style
|
||||
|
||||
# depend on install-all so that downgrade scripts are installed as well
|
||||
check: all install-all
|
||||
# explicetely does not use $(MAKE) to avoid parallelism
|
||||
make -C src/test/regress check
|
||||
$(MAKE) -C src/test/regress check-full
|
||||
|
||||
.PHONY: all check clean install install-downgrades install-all
|
||||
|
|
|
@ -64,8 +64,8 @@ $(citus_top_builddir)/Makefile.global: $(citus_abs_top_srcdir)/configure $(citus
|
|||
$(citus_top_builddir)/config.status: $(citus_abs_top_srcdir)/configure $(citus_abs_top_srcdir)/src/backend/distributed/citus.control
|
||||
cd @abs_top_builddir@ && ./config.status --recheck && ./config.status
|
||||
|
||||
# Regenerate configure if configure.ac changed
|
||||
$(citus_abs_top_srcdir)/configure: $(citus_abs_top_srcdir)/configure.ac
|
||||
# Regenerate configure if configure.in changed
|
||||
$(citus_abs_top_srcdir)/configure: $(citus_abs_top_srcdir)/configure.in
|
||||
cd ${citus_abs_top_srcdir} && ./autogen.sh
|
||||
|
||||
# If specified via configure, replace the default compiler. Normally
|
||||
|
|
185
README.md
|
@ -1,19 +1,8 @@
|
|||
| **<br/>The Citus database is 100% open source.<br/><img width=1000/><br/>Learn what's new in the [Citus 13.0 release blog](https://www.citusdata.com/blog/2025/02/06/distribute-postgresql-17-with-citus-13/) and the [Citus Updates page](https://www.citusdata.com/updates/).<br/><br/>**|
|
||||
|---|
|
||||
<br/>
|
||||
|
||||
|
||||
|
||||

|
||||

|
||||
|
||||
[](https://slack.citusdata.com)
|
||||
[](https://docs.citusdata.com/)
|
||||
[](https://stackoverflow.com/questions/tagged/citus)
|
||||
[](https://slack.citusdata.com/)
|
||||
[](https://app.codecov.io/gh/citusdata/citus)
|
||||
[](https://twitter.com/intent/follow?screen_name=citusdata)
|
||||
|
||||
[](https://packagecloud.io/app/citusdata/community/search?q=&filter=debs)
|
||||
[](https://packagecloud.io/app/citusdata/community/search?q=&filter=rpms)
|
||||
|
||||
## What is Citus?
|
||||
|
||||
|
@ -25,21 +14,16 @@ With Citus, you extend your PostgreSQL database with new superpowers:
|
|||
- **References tables** are replicated to all nodes for joins and foreign keys from distributed tables and maximum read performance.
|
||||
- **Distributed query engine** routes and parallelizes SELECT, DML, and other operations on distributed tables across the cluster.
|
||||
- **Columnar storage** compresses data, speeds up scans, and supports fast projections, both on regular and distributed tables.
|
||||
- **Query from any node** enables you to utilize the full capacity of your cluster for distributed queries
|
||||
|
||||
You can use these Citus superpowers to make your Postgres database scale-out ready on a single Citus node. Or you can build a large cluster capable of handling **high transaction throughputs**, especially in **multi-tenant apps**, run **fast analytical queries**, and process large amounts of **time series** or **IoT data** for **real-time analytics**. When your data size and volume grow, you can easily add more worker nodes to the cluster and rebalance the shards.
|
||||
|
||||
Our [SIGMOD '21](https://2021.sigmod.org/) paper [Citus: Distributed PostgreSQL for Data-Intensive Applications](https://doi.org/10.1145/3448016.3457551) gives a more detailed look into what Citus is, how it works, and why it works that way.
|
||||
|
||||

|
||||

|
||||
|
||||
Since Citus is an extension to Postgres, you can use Citus with the latest Postgres versions. And Citus works seamlessly with the PostgreSQL tools and extensions you are already familiar with.
|
||||
|
||||
- [Why Citus?](#why-citus)
|
||||
- [Getting Started](#getting-started)
|
||||
- [Using Citus](#using-citus)
|
||||
- [Schema-based sharding](#schema-based-sharding)
|
||||
- [Setting up with High Availability](#setting-up-with-high-availability)
|
||||
- [Documentation](#documentation)
|
||||
- [Architecture](#architecture)
|
||||
- [When to Use Citus](#when-to-use-citus)
|
||||
|
@ -53,23 +37,23 @@ Developers choose Citus for two reasons:
|
|||
|
||||
1. Your application is outgrowing a single PostgreSQL node
|
||||
|
||||
If the size and volume of your data increases over time, you may start seeing any number of performance and scalability problems on a single PostgreSQL node. For example: High CPU utilization and I/O wait times slow down your queries, SQL queries return out of memory errors, autovacuum cannot keep up and increases table bloat, etc.
|
||||
If the size and volume of your data increases over time, you may start seeing any number of performance and scalability problems on a single PostgreSQL node. For example: High CPU utilization and I/O wait times slow down your queries, SQL queries return out of memory errors, autovacuum cannot keep up and increases table bloat, etc.
|
||||
|
||||
With Citus you can distribute and optionally compress your tables to always have enough memory, CPU, and I/O capacity to achieve high performance at scale. The distributed query engine can efficiently route transactions across the cluster, while parallelizing analytical queries and batch operations across all cores. Moreover, you can still use the PostgreSQL features and tools you know and love.
|
||||
With Citus you can distribute and optionally compress your tables to always have enough memory, CPU, and I/O capacity to achieve high performance at scale. The distributed query engine can efficiently route transactions across the cluster, while parallelizing analytical queries and batch operations across all cores. Moreover, you can still use the PostgreSQL features and tools you know and love.
|
||||
|
||||
2. PostgreSQL can do things other systems can’t
|
||||
|
||||
There are many data processing systems that are built to scale out, but few have as many powerful capabilities as PostgreSQL, including: Advanced joins and subqueries, user-defined functions, update/delete/upsert, constraints and foreign keys, powerful extensions (e.g. PostGIS, HyperLogLog), many types of indexes, time-partitioning, and sophisticated JSON support.
|
||||
There are many data processing systems that are built to scale out, but few have as many powerful capabilities as PostgreSQL, including: Advanced joins and subqueries, user-defined functions, update/delete/upsert, constraints and foreign keys, powerful extensions (e.g. PostGIS, HyperLogLog), many types of indexes, time-partitioning, and sophisticated JSON support.
|
||||
|
||||
Citus makes PostgreSQL’s most powerful capabilities work at any scale, allowing you to handle complex data-intensive workloads on a single database system.
|
||||
Citus makes PostgreSQL’s most powerful capabilities work at any scale, allowing you to handle complex data-intensive workloads on a single database system.
|
||||
|
||||
## Getting Started
|
||||
|
||||
The quickest way to get started with Citus is to use the [Azure Cosmos DB for PostgreSQL](https://learn.microsoft.com/azure/cosmos-db/postgresql/quickstart-create-portal) managed service in the cloud—or [set up Citus locally](https://docs.citusdata.com/en/stable/installation/single_node.html).
|
||||
The quickest way to get started with Citus is to use the [Hyperscale (Citus)](https://docs.microsoft.com/azure/postgresql/quickstart-create-hyperscale-portal) deployment option in the Azure Database for PostgreSQL managed service—or [set up Citus locally](https://docs.citusdata.com/en/stable/installation/single_node.html).
|
||||
|
||||
### Citus Managed Service on Azure
|
||||
### Hyperscale (Citus) on Azure Database for PostgreSQL
|
||||
|
||||
You can get a fully-managed Citus cluster in minutes through the [Azure Cosmos DB for PostgreSQL portal](https://azure.microsoft.com/products/cosmos-db/). Azure will manage your backups, high availability through auto-failover, software updates, monitoring, and more for all of your servers. To get started Citus on Azure, use the [Azure Cosmos DB for PostgreSQL Quickstart](https://learn.microsoft.com/azure/cosmos-db/postgresql/quickstart-create-portal).
|
||||
You can get a fully-managed Citus cluster in minutes through the Hyperscale (Citus) deployment option in the [Azure Database for PostgreSQL](https://azure.microsoft.com/services/postgresql/) portal. Azure will manage your backups, high availability through auto-failover, software updates, monitoring, and more for all of your servers. To get started with Hyperscale (Citus), use the [Hyperscale (Citus) Quickstart](https://docs.microsoft.com/azure/postgresql/quickstart-create-hyperscale-portal) in the Azure docs.
|
||||
|
||||
### Running Citus using Docker
|
||||
|
||||
|
@ -95,14 +79,14 @@ Install packages on Ubuntu / Debian:
|
|||
```bash
|
||||
curl https://install.citusdata.com/community/deb.sh > add-citus-repo.sh
|
||||
sudo bash add-citus-repo.sh
|
||||
sudo apt-get -y install postgresql-17-citus-13.0
|
||||
sudo apt-get -y install postgresql-13-citus-10.0
|
||||
```
|
||||
|
||||
Install packages on Red Hat:
|
||||
Install packages on CentOS / Fedora / Red Hat:
|
||||
```bash
|
||||
curl https://install.citusdata.com/community/rpm.sh > add-citus-repo.sh
|
||||
sudo bash add-citus-repo.sh
|
||||
sudo yum install -y citus130_17
|
||||
sudo yum install -y citus100_13
|
||||
```
|
||||
|
||||
To add Citus to your local PostgreSQL database, add the following to `postgresql.conf`:
|
||||
|
@ -124,7 +108,7 @@ If you want to set up a multi-node cluster, you can also set up additional Postg
|
|||
|
||||
```sql
|
||||
-- before adding the first worker node, tell future worker nodes how to reach the coordinator
|
||||
SELECT citus_set_coordinator_host('10.0.0.1', 5432);
|
||||
-- SELECT citus_set_coordinator_host('10.0.0.1', 5432);
|
||||
|
||||
-- add worker nodes
|
||||
SELECT citus_add_node('10.0.0.2', 5432);
|
||||
|
@ -234,42 +218,7 @@ WHERE device_type_id = 55;
|
|||
Time: 209.961 ms
|
||||
```
|
||||
|
||||
Co-location also helps you scale [INSERT..SELECT](https://docs.citusdata.com/en/stable/articles/aggregation.html), [stored procedures](https://www.citusdata.com/blog/2020/11/21/making-postgres-stored-procedures-9x-faster-in-citus/), and [distributed transactions](https://www.citusdata.com/blog/2017/06/02/scaling-complex-sql-transactions/).
|
||||
|
||||
### Distributing Tables without interrupting the application
|
||||
|
||||
|
||||
Some of you already start with Postgres, and decide to distribute tables later on while your application using the tables. In that case, you want to avoid downtime for both reads and writes. `create_distributed_table` command block writes (e.g., DML commands) on the table until the command is finished. Instead, with `create_distributed_table_concurrently` command, your application can continue to read and write the data even during the command.
|
||||
|
||||
|
||||
```sql
|
||||
CREATE TABLE device_logs (
|
||||
device_id bigint primary key,
|
||||
log text
|
||||
);
|
||||
|
||||
-- insert device logs
|
||||
INSERT INTO device_logs (device_id, log)
|
||||
SELECT s, 'device log:'||s FROM generate_series(0, 99) s;
|
||||
|
||||
-- convert device_logs into a distributed table without interrupting the application
|
||||
SELECT create_distributed_table_concurrently('device_logs', 'device_id', colocate_with := 'devices');
|
||||
|
||||
|
||||
-- get the count of the logs, parallelized across shards
|
||||
SELECT count(*) FROM device_logs;
|
||||
|
||||
┌───────┐
|
||||
│ count │
|
||||
├───────┤
|
||||
│ 100 │
|
||||
└───────┘
|
||||
(1 row)
|
||||
|
||||
Time: 48.734 ms
|
||||
```
|
||||
|
||||
|
||||
Co-location also helps you scale [INSERT..SELECT]( https://docs.citusdata.com/en/stable/articles/aggregation.html), [stored procedures]( https://www.citusdata.com/blog/2020/11/21/making-postgres-stored-procedures-9x-faster-in-citus/), and [distributed transactions](https://www.citusdata.com/blog/2017/06/02/scaling-complex-sql-transactions/).
|
||||
|
||||
### Creating Reference Tables
|
||||
|
||||
|
@ -344,78 +293,15 @@ CREATE TABLE events_row AS SELECT * FROM events_columnar;
|
|||
|
||||
You can use columnar storage by itself, or in a distributed table to combine the benefits of compression and the distributed query engine.
|
||||
|
||||
When using columnar storage, you should only load data in batch using `COPY` or `INSERT..SELECT` to achieve good compression. Update, delete, and foreign keys are currently unsupported on columnar tables. However, you can use partitioned tables in which newer partitions use row-based storage, and older partitions are compressed using columnar storage.
|
||||
When using columnar storage, you should only load data in batch using `COPY` or `INSERT..SELECT` to achieve good compression. Update, delete, indexes, and foreign keys are currently unsupported on columnar tables. However, you can use partitioned tables in which newer partitions use row-based storage, and older partitions are compressed using columnar storage.
|
||||
|
||||
To learn more about columnar storage, check out the [columnar storage README](https://github.com/citusdata/citus/blob/master/src/backend/columnar/README.md).
|
||||
|
||||
## Schema-based sharding
|
||||
|
||||
Available since Citus 12.0, [schema-based sharding](https://docs.citusdata.com/en/stable/get_started/concepts.html#schema-based-sharding) is the shared database, separate schema model, the schema becomes the logical shard within the database. Multi-tenant apps can a use a schema per tenant to easily shard along the tenant dimension. Query changes are not required and the application usually only needs a small modification to set the proper search_path when switching tenants. Schema-based sharding is an ideal solution for microservices, and for ISVs deploying applications that cannot undergo the changes required to onboard row-based sharding.
|
||||
|
||||
### Creating distributed schemas
|
||||
|
||||
You can turn an existing schema into a distributed schema by calling `citus_schema_distribute`:
|
||||
|
||||
```sql
|
||||
SELECT citus_schema_distribute('user_service');
|
||||
```
|
||||
|
||||
Alternatively, you can set `citus.enable_schema_based_sharding` to have all newly created schemas be automatically converted into distributed schemas:
|
||||
|
||||
```sql
|
||||
SET citus.enable_schema_based_sharding TO ON;
|
||||
|
||||
CREATE SCHEMA AUTHORIZATION user_service;
|
||||
CREATE SCHEMA AUTHORIZATION time_service;
|
||||
CREATE SCHEMA AUTHORIZATION ping_service;
|
||||
```
|
||||
|
||||
### Running queries
|
||||
|
||||
Queries will be properly routed to schemas based on `search_path` or by explicitly using the schema name in the query.
|
||||
|
||||
For [microservices](https://docs.citusdata.com/en/stable/get_started/tutorial_microservices.html) you would create a USER per service matching the schema name, hence the default `search_path` would contain the schema name. When connected the user queries would be automatically routed and no changes to the microservice would be required.
|
||||
|
||||
```sql
|
||||
CREATE USER user_service;
|
||||
CREATE SCHEMA AUTHORIZATION user_service;
|
||||
```
|
||||
|
||||
For typical multi-tenant applications, you would set the search path to the tenant schema name in your application:
|
||||
|
||||
```sql
|
||||
SET search_path = tenant_name, public;
|
||||
```
|
||||
|
||||
## Setting up with High Availability
|
||||
|
||||
One of the most popular high availability solutions for PostgreSQL, [Patroni 3.0](https://github.com/zalando/patroni), has [first class support for Citus 10.0 and above](https://patroni.readthedocs.io/en/latest/citus.html#citus), additionally since Citus 11.2 ships with improvements for smoother node switchover in Patroni.
|
||||
|
||||
An example of patronictl list output for the Citus cluster:
|
||||
|
||||
```bash
|
||||
postgres@coord1:~$ patronictl list demo
|
||||
```
|
||||
|
||||
```text
|
||||
+ Citus cluster: demo ----------+--------------+---------+----+-----------+
|
||||
| Group | Member | Host | Role | State | TL | Lag in MB |
|
||||
+-------+---------+-------------+--------------+---------+----+-----------+
|
||||
| 0 | coord1 | 172.27.0.10 | Replica | running | 1 | 0 |
|
||||
| 0 | coord2 | 172.27.0.6 | Sync Standby | running | 1 | 0 |
|
||||
| 0 | coord3 | 172.27.0.4 | Leader | running | 1 | |
|
||||
| 1 | work1-1 | 172.27.0.8 | Sync Standby | running | 1 | 0 |
|
||||
| 1 | work1-2 | 172.27.0.2 | Leader | running | 1 | |
|
||||
| 2 | work2-1 | 172.27.0.5 | Sync Standby | running | 1 | 0 |
|
||||
| 2 | work2-2 | 172.27.0.7 | Leader | running | 1 | |
|
||||
+-------+---------+-------------+--------------+---------+----+-----------+
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
If you’re ready to get started with Citus or want to know more, we recommend reading the [Citus open source documentation](https://docs.citusdata.com/en/stable/). Or, if you are using Citus on Azure, then the [Azure Cosmos DB for PostgreSQL](https://learn.microsoft.com/azure/cosmos-db/postgresql/introduction) is the place to start.
|
||||
If you’re ready to get started with Citus or want to know more, we recommend reading the [Citus open source documentation](https://docs.citusdata.com/en/stable/). Or, if you are using Citus on Azure, then the [Hyperscale (Citus) documentation](https://docs.microsoft.com/azure/postgresql/hyperscale/) is online and available as part of the Azure Database for PostgreSQL docs.
|
||||
|
||||
Our Citus docs contain comprehensive use case guides on how to build a [multi-tenant SaaS application](https://docs.citusdata.com/en/stable/use_cases/multi_tenant.html), [real-time analytics dashboard]( https://docs.citusdata.com/en/stable/use_cases/realtime_analytics.html), or work with [time series data](https://docs.citusdata.com/en/stable/use_cases/timeseries.html).
|
||||
Our Citus docs contain comprehensive use case guides on how to build a [multi-tenant SaaS application]( https://docs.citusdata.com/en/stable/use_cases/multi_tenant.html), [real-time analytics dashboard]( https://docs.citusdata.com/en/stable/use_cases/realtime_analytics.html), or work with [time series data]( https://docs.citusdata.com/en/stable/use_cases/timeseries.html).
|
||||
|
||||
## Architecture
|
||||
|
||||
|
@ -423,13 +309,10 @@ A Citus database cluster grows from a single PostgreSQL node into a cluster by a
|
|||
|
||||
Data in distributed tables is stored in “shards”, which are actually just regular PostgreSQL tables on the worker nodes. When querying a distributed table on the coordinator node, Citus will send regular SQL queries to the worker nodes. That way, all the usual PostgreSQL optimizations and extensions can automatically be used with Citus.
|
||||
|
||||

|
||||

|
||||
|
||||
When you send a query in which all (co-located) distributed tables have the same filter on the distribution column, Citus will automatically detect that and send the whole query to the worker node that stores the data. That way, arbitrarily complex queries are supported with minimal routing overhead, which is especially useful for scaling transactional workloads. If queries do not have a specific filter, each shard is queried in parallel, which is especially useful in analytical workloads. The Citus distributed executor is adaptive and is designed to handle both query types at the same time on the same system under high concurrency, which enables large-scale mixed workloads.
|
||||
|
||||
The schema and metadata of distributed tables and reference tables are automatically synchronized to all the nodes in the cluster. That way, you can connect to any node to run distributed queries. Schema changes and cluster administration still need to go through the coordinator.
|
||||
|
||||
Detailed descriptions of the implementation for Citus developers are provided in the [Citus Technical Documentation](src/backend/distributed/README.md).
|
||||
|
||||
## When to use Citus
|
||||
|
||||
|
@ -440,56 +323,48 @@ Citus is uniquely capable of scaling both analytical and transactional workloads
|
|||
|
||||
The advanced parallel, distributed query engine in Citus combined with PostgreSQL features such as [array types](https://www.postgresql.org/docs/current/arrays.html), [JSONB](https://www.postgresql.org/docs/current/datatype-json.html), [lateral joins](https://heap.io/blog/engineering/postgresqls-powerful-new-join-type-lateral), and extensions like [HyperLogLog](https://github.com/citusdata/postgresql-hll) and [TopN](https://github.com/citusdata/postgresql-topn) allow you to build responsive analytics dashboards no matter how many customers or how much data you have.
|
||||
|
||||
Example real-time analytics users: [Algolia](https://www.citusdata.com/customers/algolia)
|
||||
Example real-time analytics users: [Algolia](https://www.citusdata.com/customers/algolia), [Heap](https://www.citusdata.com/customers/heap)
|
||||
|
||||
- **[Time series data](http://docs.citusdata.com/en/stable/use_cases/timeseries.html)**:
|
||||
Citus enables you to process and analyze very large amounts of time series data. The biggest Citus clusters store well over a petabyte of time series data and ingest terabytes per day.
|
||||
|
||||
Citus integrates seamlessly with [Postgres table partitioning](https://www.postgresql.org/docs/current/ddl-partitioning.html) and has [built-in functions for partitioning by time](https://www.citusdata.com/blog/2021/10/22/how-to-scale-postgres-for-time-series-data-with-citus/), which can speed up queries and writes on time series tables. You can take advantage of Citus’s parallel, distributed query engine for fast analytical queries, and use the built-in *columnar storage* to compress old partitions.
|
||||
Citus integrates seamlessly with [Postgres table partitioning](https://www.postgresql.org/docs/current/ddl-partitioning.html) and [pg_partman](https://www.citusdata.com/blog/2018/01/24/citus-and-pg-partman-creating-a-scalable-time-series-database-on-PostgreSQL/), which can speed up queries and writes on time series tables. You can take advantage of Citus’s parallel, distributed query engine for fast analytical queries, and use the built-in *columnar storage* to compress old partitions.
|
||||
|
||||
Example users: [MixRank](https://www.citusdata.com/customers/mixrank)
|
||||
Example users: [MixRank](https://www.citusdata.com/customers/mixrank), [Windows team](https://techcommunity.microsoft.com/t5/azure-database-for-postgresql/architecting-petabyte-scale-analytics-by-scaling-out-postgres-on/ba-p/969685)
|
||||
|
||||
- **[Software-as-a-service (SaaS) applications](http://docs.citusdata.com/en/stable/use_cases/multi_tenant.html)**:
|
||||
SaaS and other multi-tenant applications need to be able to scale their database as the number of tenants/customers grows. Citus enables you to transparently shard a complex data model by the tenant dimension, so your database can grow along with your business.
|
||||
|
||||
By distributing tables along a tenant ID column and co-locating data for the same tenant, Citus can horizontally scale complex (tenant-scoped) queries, transactions, and foreign key graphs. Reference tables and distributed DDL commands make database management a breeze compared to manual sharding. On top of that, you have a built-in distributed query engine for doing cross-tenant analytics inside the database.
|
||||
|
||||
Example multi-tenant SaaS users: [Salesloft](https://fivetran.com/case-studies/replicating-sharded-databases-a-case-study-of-salesloft-citus-data-and-fivetran), [ConvertFlow](https://www.citusdata.com/customers/convertflow)
|
||||
|
||||
- **[Microservices](https://docs.citusdata.com/en/stable/get_started/tutorial_microservices.html)**: Citus supports schema based sharding, which allows distributing regular database schemas across many machines. This sharding methodology fits nicely with typical Microservices architecture, where storage is fully owned by the service hence can’t share the same schema definition with other tenants. Citus allows distributing horizontally scalable state across services, solving one of the [main problems](https://stackoverflow.blog/2020/11/23/the-macro-problem-with-microservices/) of microservices.
|
||||
Example multi-tenant SaaS users: [Copper](https://www.citusdata.com/customers/copper), [Salesloft](https://fivetran.com/case-studies/replicating-sharded-databases-a-case-study-of-salesloft-citus-data-and-fivetran), [ConvertFlow](https://www.citusdata.com/customers/convertflow)
|
||||
|
||||
- **Geospatial**:
|
||||
Because of the powerful [PostGIS](https://postgis.net/) extension to Postgres that adds support for geographic objects into Postgres, many people run spatial/GIS applications on top of Postgres. And since spatial location information has become part of our daily life, well, there are more geospatial applications than ever. When your Postgres database needs to scale out to handle an increased workload, Citus is a good fit.
|
||||
|
||||
Example geospatial users: [Helsinki Regional Transportation Authority (HSL)](https://customers.microsoft.com/story/845146-transit-authority-improves-traffic-monitoring-with-azure-database-for-postgresql-hyperscale), [MobilityDB](https://www.citusdata.com/blog/2020/11/09/analyzing-gps-trajectories-at-scale-with-postgres-mobilitydb/).
|
||||
Example geospatial users: [Helsinki Regional Transportation Authority (HSL)](https://customers.microsoft.com/en-us/story/845146-transit-authority-improves-traffic-monitoring-with-azure-database-for-postgresql-hyperscale), [MobilityDB]( https://www.citusdata.com/blog/2020/11/09/analyzing-gps-trajectories-at-scale-with-postgres-mobilitydb/).
|
||||
|
||||
## Need Help?
|
||||
|
||||
- **Slack**: Ask questions in our Citus community [Slack channel](https://slack.citusdata.com).
|
||||
- **GitHub issues**: Please submit issues via [GitHub issues](https://github.com/citusdata/citus/issues).
|
||||
- **Documentation**: Our [Citus docs](https://docs.citusdata.com ) have a wealth of resources, including sections on [query performance tuning](https://docs.citusdata.com/en/stable/performance/performance_tuning.html), [useful diagnostic queries](https://docs.citusdata.com/en/stable/admin_guide/diagnostic_queries.html), and [common error messages](https://docs.citusdata.com/en/stable/reference/common_errors.html).
|
||||
- **Docs issues**: You can also submit documentation issues via [GitHub issues for our Citus docs](https://github.com/citusdata/citus_docs/issues).
|
||||
- **Updates & Release Notes**: Learn about what's new in each Citus version on the [Citus Updates page](https://www.citusdata.com/updates/).
|
||||
- **Docs issues**: You can also submit documentation issues via [GitHub
|
||||
issues for our Citus docs](https://github.com/citusdata/citus_docs/issues).
|
||||
|
||||
## Contributing
|
||||
|
||||
Citus is built on and of open source, and we welcome your contributions. The [CONTRIBUTING.md](CONTRIBUTING.md) file explains how to get started developing the Citus extension itself and our code quality guidelines.
|
||||
|
||||
## Code of Conduct
|
||||
|
||||
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
|
||||
For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
|
||||
contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
|
||||
|
||||
## Stay Connected
|
||||
|
||||
- **Twitter**: Follow us [@citusdata](https://twitter.com/citusdata) to track the latest posts & updates on what’s happening.
|
||||
- **Citus Blog**: Read our popular [Citus Open Source Blog](https://www.citusdata.com/blog/) for posts about PostgreSQL and Citus.
|
||||
- **Citus Blog**: Read our popular [Citus Blog](https://www.citusdata.com/blog/) for useful & informative posts about PostgreSQL and Citus.
|
||||
- **Citus Newsletter**: Subscribe to our monthly technical [Citus Newsletter](https://www.citusdata.com/join-newsletter) to get a curated collection of our favorite posts, videos, docs, talks, & other Postgres goodies.
|
||||
- **Slack**: Our [Citus Public slack](https://slack.citusdata.com/) is a good way to stay connected, not just with us but with other Citus users.
|
||||
- **Sister Blog**: Read the PostgreSQL posts on the [Azure Cosmos DB for PostgreSQL blog](https://devblogs.microsoft.com/cosmosdb/category/postgresql/) about our managed service on Azure.
|
||||
- **Slack**: Our [Citus Public slack]( https://slack.citusdata.com/) is a good way to stay connected, not just with us but with other Citus users.
|
||||
- **Sister Blog**: Read our Azure Database for PostgreSQL [sister blog on Microsoft TechCommunity](https://techcommunity.microsoft.com/t5/azure-database-for-postgresql/bg-p/ADforPostgreSQL) for posts relating to Postgres (and Citus) on Azure.
|
||||
- **Videos**: Check out this [YouTube playlist](https://www.youtube.com/playlist?list=PLixnExCn6lRq261O0iwo4ClYxHpM9qfVy) of some of our favorite Citus videos and demos. If you want to deep dive into how Citus extends PostgreSQL, you might want to check out Marco Slot’s talk at Carnegie Mellon titled [Citus: Distributed PostgreSQL as an Extension](https://youtu.be/X-aAgXJZRqM) that was part of Andy Pavlo’s Vaccination Database Talks series at CMUDB.
|
||||
- **Our other Postgres projects**: Our team also works on other awesome PostgreSQL open source extensions & projects, including: [pg_cron](https://github.com/citusdata/pg_cron), [HyperLogLog](https://github.com/citusdata/postgresql-hll), [TopN](https://github.com/citusdata/postgresql-topn), [pg_auto_failover](https://github.com/citusdata/pg_auto_failover), [activerecord-multi-tenant](https://github.com/citusdata/activerecord-multi-tenant), and [django-multitenant](https://github.com/citusdata/django-multitenant).
|
||||
- **Our other Postgres projects**: Our team also works on other awesome PostgreSQL open source extensions & projects, including: [pg_cron]( https://github.com/citusdata/pg_cron), [HyperLogLog](https://github.com/citusdata/postgresql-hll), [TopN](https://github.com/citusdata/postgresql-topn), [pg_auto_failover](https://github.com/citusdata/pg_auto_failover), [activerecord-multi-tenant](https://github.com/citusdata/activerecord-multi-tenant), and [django-multitenant](https://github.com/citusdata/django-multitenant).
|
||||
|
||||
___
|
||||
|
||||
|
|
41
SECURITY.md
|
@ -1,41 +0,0 @@
|
|||
<!-- BEGIN MICROSOFT SECURITY.MD V0.0.8 BLOCK -->
|
||||
|
||||
## Security
|
||||
|
||||
Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
|
||||
|
||||
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
|
||||
|
||||
## Reporting Security Issues
|
||||
|
||||
**Please do not report security vulnerabilities through public GitHub issues.**
|
||||
|
||||
Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
|
||||
|
||||
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
|
||||
|
||||
You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc).
|
||||
|
||||
Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
|
||||
|
||||
* Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
|
||||
* Full paths of source file(s) related to the manifestation of the issue
|
||||
* The location of the affected source code (tag/branch/commit or direct URL)
|
||||
* Any special configuration required to reproduce the issue
|
||||
* Step-by-step instructions to reproduce the issue
|
||||
* Proof-of-concept or exploit code (if possible)
|
||||
* Impact of the issue, including how an attacker might exploit the issue
|
||||
|
||||
This information will help us triage your report more quickly.
|
||||
|
||||
If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
|
||||
|
||||
## Preferred Languages
|
||||
|
||||
We prefer all communications to be in English.
|
||||
|
||||
## Policy
|
||||
|
||||
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
|
||||
|
||||
<!-- END MICROSOFT SECURITY.MD BLOCK -->
|
160
STYLEGUIDE.md
|
@ -1,160 +0,0 @@
|
|||
# Coding style
|
||||
|
||||
The existing code-style in our code-base is not super consistent. There are multiple reasons for that. One big reason is because our code-base is relatively old and our standards have changed over time. The second big reason is that our style-guide is different from style-guide of Postgres and some code is copied from Postgres source code and is slightly modified. The below rules are for new code. If you're changing existing code that uses a different style, use your best judgement to decide if you use the rules here or if you match the existing style.
|
||||
|
||||
## Using citus_indent
|
||||
|
||||
CI pipeline will automatically reject any PRs which do not follow our coding
|
||||
conventions. The easiest way to ensure your PR adheres to those conventions is
|
||||
to use the [citus_indent](https://github.com/citusdata/tools/tree/develop/uncrustify)
|
||||
tool. This tool uses `uncrustify` under the hood.
|
||||
|
||||
```bash
|
||||
# Uncrustify changes the way it formats code every release a bit. To make sure
|
||||
# everyone formats consistently we use version 0.68.1:
|
||||
curl -L https://github.com/uncrustify/uncrustify/archive/uncrustify-0.68.1.tar.gz | tar xz
|
||||
cd uncrustify-uncrustify-0.68.1/
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
make -j5
|
||||
sudo make install
|
||||
cd ../..
|
||||
|
||||
git clone https://github.com/citusdata/tools.git
|
||||
cd tools
|
||||
make uncrustify/.install
|
||||
```
|
||||
|
||||
Once you've done that, you can run the `make reindent` command from the top
|
||||
directory to recursively check and correct the style of any source files in the
|
||||
current directory. Under the hood, `make reindent` will run `citus_indent` and
|
||||
some other style corrections for you.
|
||||
|
||||
You can also run the following in the directory of this repository to
|
||||
automatically format all the files that you have changed before committing:
|
||||
|
||||
```bash
|
||||
cat > .git/hooks/pre-commit << __EOF__
|
||||
#!/bin/bash
|
||||
citus_indent --check --diff || { citus_indent --diff; exit 1; }
|
||||
__EOF__
|
||||
chmod +x .git/hooks/pre-commit
|
||||
```
|
||||
|
||||
## Other rules we follow that citus_indent does not enforce
|
||||
|
||||
* We almost always use **CamelCase**, when naming functions, variables etc., **not snake_case**.
|
||||
|
||||
* We also have the habits of using a **lowerCamelCase** for some variables named from their type or from their function name, as shown in the examples:
|
||||
|
||||
```c
|
||||
bool IsCitusExtensionLoaded = false;
|
||||
|
||||
|
||||
bool
|
||||
IsAlterTableRenameStmt(RenameStmt *renameStmt)
|
||||
{
|
||||
AlterTableCmd *alterTableCommand = NULL;
|
||||
..
|
||||
..
|
||||
|
||||
bool isAlterTableRenameStmt = false;
|
||||
..
|
||||
}
|
||||
```
|
||||
|
||||
* We **start functions with a comment**:
|
||||
|
||||
```c
|
||||
/*
|
||||
* MyNiceFunction <something in present simple tense, e.g., processes / returns / checks / takes X as input / does Y> ..
|
||||
* <some more nice words> ..
|
||||
* <some more nice words> ..
|
||||
*/
|
||||
<static?> <return type>
|
||||
MyNiceFunction(..)
|
||||
{
|
||||
..
|
||||
..
|
||||
}
|
||||
```
|
||||
|
||||
* `#includes` needs to be sorted based on below ordering and then alphabetically and we should not include what we don't need in a file:
|
||||
|
||||
* System includes (eg. #include<...>)
|
||||
* Postgres.h (eg. #include "postgres.h")
|
||||
* Toplevel imports from postgres, not contained in a directory (eg. #include "miscadmin.h")
|
||||
* General postgres includes (eg . #include "nodes/...")
|
||||
* Toplevel citus includes, not contained in a directory (eg. #include "citus_verion.h")
|
||||
* Columnar includes (eg. #include "columnar/...")
|
||||
* Distributed includes (eg. #include "distributed/...")
|
||||
|
||||
* Comments:
|
||||
```c
|
||||
/* single line comments start with a lower-case */
|
||||
|
||||
/*
|
||||
* We start multi-line comments with a capital letter
|
||||
* and keep adding a star to the beginning of each line
|
||||
* until we close the comment with a star and a slash.
|
||||
*/
|
||||
```
|
||||
|
||||
* Order of function implementations and their declarations in a file:
|
||||
|
||||
We define static functions after the functions that call them. For example:
|
||||
|
||||
```c
|
||||
#include<..>
|
||||
#include<..>
|
||||
..
|
||||
..
|
||||
typedef struct
|
||||
{
|
||||
..
|
||||
..
|
||||
} MyNiceStruct;
|
||||
..
|
||||
..
|
||||
PG_FUNCTION_INFO_V1(my_nice_udf1);
|
||||
PG_FUNCTION_INFO_V1(my_nice_udf2);
|
||||
..
|
||||
..
|
||||
// .. somewhere on top of the file …
|
||||
static void MyNiceStaticlyDeclaredFunction1(…);
|
||||
static void MyNiceStaticlyDeclaredFunction2(…);
|
||||
..
|
||||
..
|
||||
|
||||
|
||||
void
|
||||
MyNiceFunctionExternedViaHeaderFile(..)
|
||||
{
|
||||
..
|
||||
..
|
||||
MyNiceStaticlyDeclaredFunction1(..);
|
||||
..
|
||||
..
|
||||
MyNiceStaticlyDeclaredFunction2(..);
|
||||
..
|
||||
}
|
||||
|
||||
..
|
||||
..
|
||||
|
||||
// we define this first because it's called by MyNiceFunctionExternedViaHeaderFile()
|
||||
// before MyNiceStaticlyDeclaredFunction2()
|
||||
static void
|
||||
MyNiceStaticlyDeclaredFunction1(…)
|
||||
{
|
||||
}
|
||||
..
|
||||
..
|
||||
|
||||
// then we define this
|
||||
static void
|
||||
MyNiceStaticlyDeclaredFunction2(…)
|
||||
{
|
||||
}
|
||||
```
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# autogen.sh converts configure.ac to configure and creates
|
||||
# autogen.sh converts configure.in to configure and creates
|
||||
# citus_config.h.in. The resuting resulting files are checked into
|
||||
# the SCM, to avoid everyone needing autoconf installed.
|
||||
|
||||
|
|
54
ci/README.md
|
@ -156,9 +156,9 @@ git merge "community/$PR_BRANCH"
|
|||
familiar with the change.
|
||||
5. You should rerun the `check-merge-to-enterprise` check on
|
||||
`community/$PR_BRANCH`. You can use re-run from failed option in circle CI.
|
||||
6. You can now merge the PR on community. Be sure to NOT use "squash and merge",
|
||||
6. You can now merge the PR on enterprise. Be sure to NOT use "squash and merge",
|
||||
but instead use the regular "merge commit" mode.
|
||||
7. You can now merge the PR on enterprise. Be sure to NOT use "squash and merge",
|
||||
7. You can now merge the PR on community. Be sure to NOT use "squash and merge",
|
||||
but instead use the regular "merge commit" mode.
|
||||
|
||||
The subsequent PRs on community will be able to pass the
|
||||
|
@ -283,14 +283,6 @@ actually run in CI. This is most commonly forgotten for newly added CI tests
|
|||
that the developer only ran locally. It also checks that all CI scripts have a
|
||||
section in this `README.md` file and that they include `ci/ci_helpers.sh`.
|
||||
|
||||
## `check_migration_files.sh`
|
||||
|
||||
A branch that touches a set of upgrade scripts is also expected to touch
|
||||
corresponding downgrade scripts as well. If this script fails, read the output
|
||||
and make sure you update the downgrade scripts in the printed list. If you
|
||||
really don't need a downgrade to run any SQL. You can write a comment in the
|
||||
file explaining why a downgrade step is not necessary.
|
||||
|
||||
## `disallow_c_comments_in_migrations.sh`
|
||||
|
||||
We do not use C-style comments in migration files as the stripped
|
||||
|
@ -301,18 +293,6 @@ Instead use SQL type comments, i.e:
|
|||
```
|
||||
See [#3115](https://github.com/citusdata/citus/pull/3115) for more info.
|
||||
|
||||
## `disallow_hash_comments_in_spec_files.sh`
|
||||
|
||||
We do not use comments starting with # in spec files because it creates errors
|
||||
from C preprocessor that expects directives after this character.
|
||||
Instead use C type comments, i.e:
|
||||
```
|
||||
// this is a single line comment
|
||||
|
||||
/*
|
||||
* this is a multi line comment
|
||||
*/
|
||||
```
|
||||
|
||||
## `disallow_long_changelog_entries.sh`
|
||||
|
||||
|
@ -371,32 +351,10 @@ This was deemed to be error prone and not worth the effort.
|
|||
|
||||
This script checks and fixes issues with `.gitignore` rules:
|
||||
|
||||
1. Makes sure git ignores the `.sql` files and expected output files that are generated
|
||||
from `.source` template files. If you created or deleted a `.source` file in a commit,
|
||||
git ignore rules should be updated to reflect this change.
|
||||
|
||||
1. Makes sure we do not commit any generated files that should be ignored. If there is an
|
||||
2. Makes sure we do not commit any generated files that should be ignored. If there is an
|
||||
ignored file in the git tree, the user is expected to review the files that are removed
|
||||
from the git tree and commit them.
|
||||
|
||||
## `check_gucs_are_alphabetically_sorted.sh`
|
||||
|
||||
This script checks the order of the GUCs defined in `shared_library_init.c`.
|
||||
To solve this failure, please check `shared_library_init.c` and make sure that the GUC
|
||||
definitions are in alphabetical order.
|
||||
|
||||
## `print_stack_trace.sh`
|
||||
|
||||
This script prints stack traces for failed tests, if they left core files.
|
||||
|
||||
## `sort_and_group_includes.sh`
|
||||
|
||||
This script checks and fixes issues with include grouping and sorting in C files.
|
||||
|
||||
Includes are grouped in the following groups:
|
||||
- System includes (eg. `#include <math>`)
|
||||
- Postgres.h include (eg. `#include "postgres.h"`)
|
||||
- Toplevel postgres includes (includes not in a directory eg. `#include "miscadmin.h`)
|
||||
- Postgres includes in a directory (eg. `#include "catalog/pg_type.h"`)
|
||||
- Toplevel citus includes (includes not in a directory eg. `#include "pg_version_constants.h"`)
|
||||
- Columnar includes (eg. `#include "columnar/columnar.h"`)
|
||||
- Distributed includes (eg. `#include "distributed/maintenanced.h"`)
|
||||
|
||||
Within every group the include lines are sorted alphabetically.
|
||||
|
|
|
@ -15,6 +15,9 @@ PG_MAJOR=${PG_MAJOR:?please provide the postgres major version}
|
|||
codename=${VERSION#*(}
|
||||
codename=${codename%)*}
|
||||
|
||||
# get project from argument
|
||||
project="${CIRCLE_PROJECT_REPONAME}"
|
||||
|
||||
# we'll do everything with absolute paths
|
||||
basedir="$(pwd)"
|
||||
|
||||
|
@ -25,7 +28,7 @@ build_ext() {
|
|||
pg_major="$1"
|
||||
|
||||
builddir="${basedir}/build-${pg_major}"
|
||||
echo "Beginning build for PostgreSQL ${pg_major}..." >&2
|
||||
echo "Beginning build of ${project} for PostgreSQL ${pg_major}..." >&2
|
||||
|
||||
# do everything in a subdirectory to avoid clutter in current directory
|
||||
mkdir -p "${builddir}" && cd "${builddir}"
|
||||
|
|
|
@ -14,8 +14,8 @@ ci_scripts=$(
|
|||
grep -v -E '^(ci_helpers.sh|fix_style.sh)$'
|
||||
)
|
||||
for script in $ci_scripts; do
|
||||
if ! grep "\\bci/$script\\b" -r .github > /dev/null; then
|
||||
echo "ERROR: CI script with name \"$script\" is not actually used in .github folder"
|
||||
if ! grep "\\bci/$script\\b" .circleci/config.yml > /dev/null; then
|
||||
echo "ERROR: CI script with name \"$script\" is not actually used in .circleci/config.yml"
|
||||
exit 1
|
||||
fi
|
||||
if ! grep "^## \`$script\`\$" ci/README.md > /dev/null; then
|
||||
|
|
|
@ -7,12 +7,13 @@ source ci/ci_helpers.sh
|
|||
|
||||
cd src/test/regress
|
||||
|
||||
# 1. Find all *.sql and *.spec files in the sql, and spec directories
|
||||
# 1. Find all *.sql *.spec and *.source files in the sql, spec and input
|
||||
# directories
|
||||
# 2. Strip the extension and the directory
|
||||
# 3. Ignore names that end with .include, those files are meant to be in an C
|
||||
# preprocessor #include statement. They should not be in schedules.
|
||||
test_names=$(
|
||||
find sql spec -iname "*.sql" -o -iname "*.spec" |
|
||||
find sql spec input -iname "*.sql" -o -iname "*.spec" -o -iname "*.source" |
|
||||
sed -E 's#^\w+/([^/]+)\.[^.]+$#\1#g' |
|
||||
grep -v '.include$'
|
||||
)
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Testing this script locally requires you to set the following environment
|
||||
# variables:
|
||||
# CIRCLE_BRANCH, GIT_USERNAME and GIT_TOKEN
|
||||
|
||||
# fail if trying to reference a variable that is not set.
|
||||
set -u
|
||||
# exit immediately if a command fails
|
||||
set -e
|
||||
# Fail on pipe failures
|
||||
set -o pipefail
|
||||
|
||||
PR_BRANCH="${CIRCLE_BRANCH}"
|
||||
ENTERPRISE_REMOTE="https://${GIT_USERNAME}:${GIT_TOKEN}@github.com/citusdata/citus-enterprise"
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source ci/ci_helpers.sh
|
||||
|
||||
# List executed commands. This is done so debugging this script is easier when
|
||||
# it fails. It's explicitly done after git remote add so username and password
|
||||
# are not shown in CI output (even though it's also filtered out by CircleCI)
|
||||
set -x
|
||||
|
||||
check_compile () {
|
||||
echo "INFO: checking if merged code can be compiled"
|
||||
./configure --without-libcurl
|
||||
make -j10
|
||||
}
|
||||
|
||||
# Clone current git repo (which should be community) to a temporary working
|
||||
# directory and go there
|
||||
GIT_DIR_ROOT="$(git rev-parse --show-toplevel)"
|
||||
TMP_GIT_DIR="$(mktemp --directory -t citus-merge-check.XXXXXXXXX)"
|
||||
git clone "$GIT_DIR_ROOT" "$TMP_GIT_DIR"
|
||||
cd "$TMP_GIT_DIR"
|
||||
|
||||
# Fails in CI without this
|
||||
git config user.email "citus-bot@microsoft.com"
|
||||
git config user.name "citus bot"
|
||||
|
||||
# Disable "set -x" temporarily, because $ENTERPRISE_REMOTE contains passwords
|
||||
{ set +x ; } 2> /dev/null
|
||||
git remote add enterprise "$ENTERPRISE_REMOTE"
|
||||
set -x
|
||||
|
||||
git remote set-url --push enterprise no-pushing
|
||||
|
||||
# Fetch enterprise-master
|
||||
git fetch enterprise enterprise-master
|
||||
|
||||
|
||||
git checkout "enterprise/enterprise-master"
|
||||
|
||||
if git merge --no-commit "origin/$PR_BRANCH"; then
|
||||
echo "INFO: community PR branch could be merged into enterprise-master"
|
||||
# check that we can compile after the merge
|
||||
if check_compile; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "WARN: Failed to compile after community PR branch was merged into enterprise"
|
||||
fi
|
||||
|
||||
# undo partial merge
|
||||
git merge --abort
|
||||
|
||||
# If we have a conflict on enterprise merge on the master branch, we have a problem.
|
||||
# Provide an error message to indicate that enterprise merge is needed.
|
||||
if [[ $PR_BRANCH = master ]]; then
|
||||
echo "ERROR: Master branch has merge conlicts with enterprise-master."
|
||||
echo "Try re-running this job if you merged community PR before enterprise PR. Otherwise conflicts need to be resolved as a separate PR on enterprise."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! git fetch enterprise "$PR_BRANCH" ; then
|
||||
echo "ERROR: enterprise/$PR_BRANCH was not found and community PR branch could not be merged into enterprise-master"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Show the top commit of the enterprise PR branch to make debugging easier
|
||||
git log -n 1 "enterprise/$PR_BRANCH"
|
||||
|
||||
# Check that this branch contains the top commit of the current community PR
|
||||
# branch. If it does not it means it's not up to date with the current PR, so
|
||||
# the enterprise branch should be updated.
|
||||
if ! git merge-base --is-ancestor "origin/$PR_BRANCH" "enterprise/$PR_BRANCH" ; then
|
||||
echo "ERROR: enterprise/$PR_BRANCH is not up to date with community PR branch"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Now check if we can merge the enterprise PR into enterprise-master without
|
||||
# issues.
|
||||
git merge --no-commit "enterprise/$PR_BRANCH"
|
||||
# check that we can compile after the merge
|
||||
check_compile
|
|
@ -1,25 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
# shellcheck disable=SC1091
|
||||
source ci/ci_helpers.sh
|
||||
|
||||
# Find the line that exactly matches "RegisterCitusConfigVariables(void)" in
|
||||
# shared_library_init.c. grep command returns something like
|
||||
# "934:RegisterCitusConfigVariables(void)" and we extract the line number
|
||||
# with cut.
|
||||
RegisterCitusConfigVariables_begin_linenumber=$(grep -n "^RegisterCitusConfigVariables(void)$" src/backend/distributed/shared_library_init.c | cut -d: -f1)
|
||||
|
||||
# Consider the lines starting from $RegisterCitusConfigVariables_begin_linenumber,
|
||||
# grep the first line that starts with "}" and extract the line number with cut
|
||||
# as in the previous step.
|
||||
RegisterCitusConfigVariables_length=$(tail -n +$RegisterCitusConfigVariables_begin_linenumber src/backend/distributed/shared_library_init.c | grep -n -m 1 "^}$" | cut -d: -f1)
|
||||
|
||||
# extract the function definition of RegisterCitusConfigVariables into a temp file
|
||||
tail -n +$RegisterCitusConfigVariables_begin_linenumber src/backend/distributed/shared_library_init.c | head -n $(($RegisterCitusConfigVariables_length)) > RegisterCitusConfigVariables_func_def.out
|
||||
|
||||
# extract citus gucs in the form of <tab><tab>"citus.X"
|
||||
grep -P "^[\t][\t]\"citus\.[a-zA-Z_0-9]+\"" RegisterCitusConfigVariables_func_def.out > gucs.out
|
||||
LC_COLLATE=C sort -c gucs.out
|
||||
rm gucs.out
|
||||
rm RegisterCitusConfigVariables_func_def.out
|
|
@ -1,33 +0,0 @@
|
|||
#! /bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
# shellcheck disable=SC1091
|
||||
source ci/ci_helpers.sh
|
||||
|
||||
# This file checks for the existence of downgrade scripts for every upgrade script that is changed in the branch.
|
||||
|
||||
# create list of migration files for upgrades
|
||||
upgrade_files=$(git diff --name-only origin/main | { grep "src/backend/distributed/sql/citus--.*sql" || exit 0 ; })
|
||||
downgrade_files=$(git diff --name-only origin/main | { grep "src/backend/distributed/sql/downgrades/citus--.*sql" || exit 0 ; })
|
||||
ret_value=0
|
||||
|
||||
for file in $upgrade_files
|
||||
do
|
||||
# There should always be 2 matches, and no need to avoid splitting here
|
||||
# shellcheck disable=SC2207
|
||||
versions=($(grep --only-matching --extended-regexp "[0-9]+\.[0-9]+[-.][0-9]+" <<< "$file"))
|
||||
|
||||
from_version=${versions[0]};
|
||||
to_version=${versions[1]};
|
||||
|
||||
downgrade_migration_file="src/backend/distributed/sql/downgrades/citus--$to_version--$from_version.sql"
|
||||
|
||||
# check for the existence of migration scripts
|
||||
if [[ $(grep --line-regexp --count "$downgrade_migration_file" <<< "$downgrade_files") == 0 ]]
|
||||
then
|
||||
echo "$file is updated, but $downgrade_migration_file is not updated in branch"
|
||||
ret_value=1
|
||||
fi
|
||||
done
|
||||
|
||||
exit $ret_value;
|
|
@ -1,10 +1,6 @@
|
|||
#! /bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# make ** match all directories and subdirectories
|
||||
shopt -s globstar
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source ci/ci_helpers.sh
|
||||
|
||||
|
@ -16,17 +12,17 @@ source ci/ci_helpers.sh
|
|||
# and reusing them if needed. GNU sed unfortunately does not support lookaround assertions.
|
||||
|
||||
# /* -> --
|
||||
find src/backend/{distributed,columnar}/sql/**/*.sql -print0 | xargs -0 sed -i 's#/\*#--#g'
|
||||
find src/backend/distributed/sql/*.sql -print0 | xargs -0 sed -i 's#/\*#--#g'
|
||||
|
||||
# */ -> `` (empty string)
|
||||
# remove all whitespaces immediately before the match
|
||||
find src/backend/{distributed,columnar}/sql/**/*.sql -print0 | xargs -0 sed -i 's#\s*\*/\s*##g'
|
||||
find src/backend/distributed/sql/*.sql -print0 | xargs -0 sed -i 's#\s*\*/\s*##g'
|
||||
|
||||
# * -> --
|
||||
# keep the indentation
|
||||
# allow only whitespaces before the match
|
||||
find src/backend/{distributed,columnar}/sql/**/*.sql -print0 | xargs -0 sed -i 's#^\(\s*\) \*#\1--#g'
|
||||
find src/backend/distributed/sql/*.sql -print0 | xargs -0 sed -i 's#^\(\s*\) \*#\1--#g'
|
||||
|
||||
# // -> --
|
||||
# do not touch http:// or similar by allowing only whitespaces before //
|
||||
find src/backend/{distributed,columnar}/sql/**/*.sql -print0 | xargs -0 sed -i 's#^\(\s*\)//#\1--#g'
|
||||
find src/backend/distributed/sql/*.sql -print0 | xargs -0 sed -i 's#^\(\s*\)//#\1--#g'
|
||||
|
|
|
@ -1,12 +0,0 @@
|
|||
#! /bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source ci/ci_helpers.sh
|
||||
|
||||
# We do not use comments starting with # in spec files because it creates warnings from
|
||||
# preprocessor that expects directives after this character.
|
||||
|
||||
# `# ` -> `-- `
|
||||
find src/test/regress/spec/*.spec -print0 | xargs -0 sed -i 's!# !// !g'
|
|
@ -1,12 +1,27 @@
|
|||
#! /bin/bash
|
||||
# shellcheck disable=SC2012
|
||||
|
||||
set -euo pipefail
|
||||
# shellcheck disable=SC1091
|
||||
source ci/ci_helpers.sh
|
||||
|
||||
# We list all the .source files in alphabetical order, and do a substitution
|
||||
# before writing the resulting file names that are created by those templates in
|
||||
# relevant .gitignore files
|
||||
#
|
||||
# 1. Capture the file name without the .source extension
|
||||
# 2. Add the desired extension at the end
|
||||
# 3. Add a / character at the beginning of each line to conform to .gitignore file format
|
||||
#
|
||||
# e.g. multi_copy.source -> /multi_copy.sql
|
||||
ls -1 src/test/regress/input | sed -E "s#(.*)\.source#/\1.sql#" > src/test/regress/sql/.gitignore
|
||||
|
||||
# e.g. multi_copy.source -> /multi_copy.out
|
||||
ls -1 src/test/regress/output | sed -E "s#(.*)\.source#/\1.out#" > src/test/regress/expected/.gitignore
|
||||
|
||||
# Remove all the ignored files from git tree, and error out
|
||||
# find all ignored files in git tree, and use quotation marks to prevent word splitting on filenames with spaces in them
|
||||
# NOTE: Option --cached is needed to avoid a bug in git ls-files command.
|
||||
ignored_lines_in_git_tree=$(git ls-files --ignored --cached --exclude-standard | sed 's/.*/"&"/')
|
||||
ignored_lines_in_git_tree=$(git ls-files --ignored --exclude-standard | sed 's/.*/"&"/')
|
||||
|
||||
if [[ -n $ignored_lines_in_git_tree ]]
|
||||
then
|
||||
|
|
|
@ -9,14 +9,9 @@ cidir="${0%/*}"
|
|||
cd ${cidir}/..
|
||||
|
||||
citus_indent . --quiet
|
||||
black . --quiet
|
||||
isort . --quiet
|
||||
ci/editorconfig.sh
|
||||
ci/remove_useless_declarations.sh
|
||||
ci/disallow_c_comments_in_migrations.sh
|
||||
ci/disallow_hash_comments_in_spec_files.sh
|
||||
ci/disallow_long_changelog_entries.sh
|
||||
ci/normalize_expected.sh
|
||||
ci/fix_gitignore.sh
|
||||
ci/print_stack_trace.sh
|
||||
ci/sort_and_group_includes.sh
|
||||
|
|
|
@ -1,157 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
easy command line to run against all citus-style checked files:
|
||||
|
||||
$ git ls-files \
|
||||
| git check-attr --stdin citus-style \
|
||||
| grep 'citus-style: set' \
|
||||
| awk '{print $1}' \
|
||||
| cut -d':' -f1 \
|
||||
| xargs -n1 ./ci/include_grouping.py
|
||||
"""
|
||||
|
||||
import collections
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def main(args):
|
||||
if len(args) < 2:
|
||||
print("Usage: include_grouping.py <file>")
|
||||
return
|
||||
|
||||
file = args[1]
|
||||
if not os.path.isfile(file):
|
||||
sys.exit(f"File '{file}' does not exist")
|
||||
|
||||
with open(file, "r") as in_file:
|
||||
with open(file + ".tmp", "w") as out_file:
|
||||
includes = []
|
||||
skipped_lines = []
|
||||
|
||||
# This calls print_sorted_includes on a set of consecutive #include lines.
|
||||
# This implicitly keeps separation of any #include lines that are contained in
|
||||
# an #ifdef, because it will order the #include lines inside and after the
|
||||
# #ifdef completely separately.
|
||||
for line in in_file:
|
||||
# if a line starts with #include we don't want to print it yet, instead we
|
||||
# want to collect all consecutive #include lines
|
||||
if line.startswith("#include"):
|
||||
includes.append(line)
|
||||
skipped_lines = []
|
||||
continue
|
||||
|
||||
# if we have collected any #include lines, we want to print them sorted
|
||||
# before printing the current line. However, if the current line is empty
|
||||
# we want to perform a lookahead to see if the next line is an #include.
|
||||
# To maintain any separation between #include lines and their subsequent
|
||||
# lines we keep track of all lines we have skipped inbetween.
|
||||
if len(includes) > 0:
|
||||
if len(line.strip()) == 0:
|
||||
skipped_lines.append(line)
|
||||
continue
|
||||
|
||||
# we have includes that need to be grouped before printing the current
|
||||
# line.
|
||||
print_sorted_includes(includes, file=out_file)
|
||||
includes = []
|
||||
|
||||
# print any skipped lines
|
||||
print("".join(skipped_lines), end="", file=out_file)
|
||||
skipped_lines = []
|
||||
|
||||
print(line, end="", file=out_file)
|
||||
|
||||
# move out_file to file
|
||||
os.rename(file + ".tmp", file)
|
||||
|
||||
|
||||
def print_sorted_includes(includes, file=sys.stdout):
|
||||
default_group_key = 1
|
||||
groups = collections.defaultdict(set)
|
||||
|
||||
# define the groups that we separate correctly. The matchers are tested in the order
|
||||
# of their priority field. The first matcher that matches the include is used to
|
||||
# assign the include to a group.
|
||||
# The groups are printed in the order of their group_key.
|
||||
matchers = [
|
||||
{
|
||||
"name": "system includes",
|
||||
"matcher": lambda x: x.startswith("<"),
|
||||
"group_key": -2,
|
||||
"priority": 0,
|
||||
},
|
||||
{
|
||||
"name": "toplevel postgres includes",
|
||||
"matcher": lambda x: "/" not in x,
|
||||
"group_key": 0,
|
||||
"priority": 9,
|
||||
},
|
||||
{
|
||||
"name": "postgres.h",
|
||||
"matcher": lambda x: x.strip() in ['"postgres.h"'],
|
||||
"group_key": -1,
|
||||
"priority": -1,
|
||||
},
|
||||
{
|
||||
"name": "toplevel citus inlcudes",
|
||||
"matcher": lambda x: x.strip()
|
||||
in [
|
||||
'"citus_version.h"',
|
||||
'"pg_version_compat.h"',
|
||||
'"pg_version_constants.h"',
|
||||
],
|
||||
"group_key": 3,
|
||||
"priority": 0,
|
||||
},
|
||||
{
|
||||
"name": "columnar includes",
|
||||
"matcher": lambda x: x.startswith('"columnar/'),
|
||||
"group_key": 4,
|
||||
"priority": 1,
|
||||
},
|
||||
{
|
||||
"name": "distributed includes",
|
||||
"matcher": lambda x: x.startswith('"distributed/'),
|
||||
"group_key": 5,
|
||||
"priority": 1,
|
||||
},
|
||||
]
|
||||
matchers.sort(key=lambda x: x["priority"])
|
||||
|
||||
# throughout our codebase we have some includes where either postgres or citus
|
||||
# includes are wrongfully included with the syntax for system includes. Before we
|
||||
# try to match those we will change the <> to "" to make them match our system. This
|
||||
# will also rewrite the include to the correct syntax.
|
||||
common_system_include_error_prefixes = ["<nodes/", "<distributed/"]
|
||||
|
||||
# assign every include to a group
|
||||
for include in includes:
|
||||
# extract the group key from the include
|
||||
include_content = include.split(" ")[1]
|
||||
|
||||
# fix common system includes which are secretly postgres or citus includes
|
||||
for common_prefix in common_system_include_error_prefixes:
|
||||
if include_content.startswith(common_prefix):
|
||||
include_content = '"' + include_content.strip()[1:-1] + '"'
|
||||
include = include.split(" ")[0] + " " + include_content + "\n"
|
||||
break
|
||||
|
||||
group_key = default_group_key
|
||||
for matcher in matchers:
|
||||
if matcher["matcher"](include_content):
|
||||
group_key = matcher["group_key"]
|
||||
break
|
||||
|
||||
groups[group_key].add(include)
|
||||
|
||||
# iterate over all groups in the natural order of its keys
|
||||
for i, group in enumerate(sorted(groups.items())):
|
||||
if i > 0:
|
||||
print(file=file)
|
||||
includes = group[1]
|
||||
print("".join(sorted(includes)), end="", file=file)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv)
|
|
@ -1,25 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source ci/ci_helpers.sh
|
||||
|
||||
# find all core files
|
||||
core_files=( $(find . -type f -regex .*core.*\d*.*postgres) )
|
||||
if [ ${#core_files[@]} -gt 0 ]; then
|
||||
# print stack traces for the core files
|
||||
for core_file in "${core_files[@]}"
|
||||
do
|
||||
# set print frame-arguments all: show all scalars + structures in the frame
|
||||
# set print pretty on: show structures in indented mode
|
||||
# set print addr off: do not show pointer address
|
||||
# thread apply all bt full: show stack traces for all threads
|
||||
gdb --batch \
|
||||
-ex "set print frame-arguments all" \
|
||||
-ex "set print pretty on" \
|
||||
-ex "set print addr off" \
|
||||
-ex "thread apply all bt full" \
|
||||
postgres "${core_file}"
|
||||
done
|
||||
fi
|
|
@ -1,12 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
# shellcheck disable=SC1091
|
||||
source ci/ci_helpers.sh
|
||||
|
||||
git ls-files \
|
||||
| git check-attr --stdin citus-style \
|
||||
| grep 'citus-style: set' \
|
||||
| awk '{print $1}' \
|
||||
| cut -d':' -f1 \
|
||||
| xargs -n1 ./ci/include_grouping.py
|
Before Width: | Height: | Size: 94 KiB After Width: | Height: | Size: 94 KiB |
Before Width: | Height: | Size: 22 KiB After Width: | Height: | Size: 22 KiB |
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 18 KiB |
|
@ -10,7 +10,7 @@
|
|||
# argument (other than "yes/no"), etc.
|
||||
#
|
||||
# The point of this implementation is to reduce code size and
|
||||
# redundancy in configure.ac and to improve robustness and consistency
|
||||
# redundancy in configure.in and to improve robustness and consistency
|
||||
# in the option evaluation code.
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for Citus 13.2devel.
|
||||
# Generated by GNU Autoconf 2.69 for Citus 10.1.6.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
|
@ -579,8 +579,8 @@ MAKEFLAGS=
|
|||
# Identity of this package.
|
||||
PACKAGE_NAME='Citus'
|
||||
PACKAGE_TARNAME='citus'
|
||||
PACKAGE_VERSION='13.2devel'
|
||||
PACKAGE_STRING='Citus 13.2devel'
|
||||
PACKAGE_VERSION='10.1.6'
|
||||
PACKAGE_STRING='Citus 10.1.6'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
|
@ -644,7 +644,6 @@ LDFLAGS
|
|||
CFLAGS
|
||||
CC
|
||||
vpath_build
|
||||
with_pg_version_check
|
||||
PATH
|
||||
PG_CONFIG
|
||||
FLEX
|
||||
|
@ -693,7 +692,6 @@ ac_subst_files=''
|
|||
ac_user_opts='
|
||||
enable_option_checking
|
||||
with_extra_version
|
||||
with_pg_version_check
|
||||
enable_coverage
|
||||
with_libcurl
|
||||
with_reports_hostname
|
||||
|
@ -1262,7 +1260,7 @@ if test "$ac_init_help" = "long"; then
|
|||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures Citus 13.2devel to adapt to many kinds of systems.
|
||||
\`configure' configures Citus 10.1.6 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
|
@ -1324,7 +1322,7 @@ fi
|
|||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of Citus 13.2devel:";;
|
||||
short | recursive ) echo "Configuration of Citus 10.1.6:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
|
@ -1339,8 +1337,6 @@ Optional Packages:
|
|||
--without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
|
||||
--with-extra-version=STRING
|
||||
append STRING to version
|
||||
--without-pg-version-check
|
||||
do not check postgres version during configure
|
||||
--without-libcurl do not use libcurl for anonymous statistics
|
||||
collection
|
||||
--with-reports-hostname=HOSTNAME
|
||||
|
@ -1429,7 +1425,7 @@ fi
|
|||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
Citus configure 13.2devel
|
||||
Citus configure 10.1.6
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
|
@ -1912,7 +1908,7 @@ cat >config.log <<_ACEOF
|
|||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by Citus $as_me 13.2devel, which was
|
||||
It was created by Citus $as_me 10.1.6, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
|
@ -2559,36 +2555,7 @@ if test -z "$version_num"; then
|
|||
as_fn_error $? "Could not detect PostgreSQL version from pg_config." "$LINENO" 5
|
||||
fi
|
||||
|
||||
|
||||
|
||||
|
||||
# Check whether --with-pg-version-check was given.
|
||||
if test "${with_pg_version_check+set}" = set; then :
|
||||
withval=$with_pg_version_check;
|
||||
case $withval in
|
||||
yes)
|
||||
:
|
||||
;;
|
||||
no)
|
||||
:
|
||||
;;
|
||||
*)
|
||||
as_fn_error $? "no argument expected for --with-pg-version-check option" "$LINENO" 5
|
||||
;;
|
||||
esac
|
||||
|
||||
else
|
||||
with_pg_version_check=yes
|
||||
|
||||
fi
|
||||
|
||||
|
||||
|
||||
|
||||
if test "$with_pg_version_check" = no; then
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: building against PostgreSQL $version_num (skipped compatibility check)" >&5
|
||||
$as_echo "$as_me: building against PostgreSQL $version_num (skipped compatibility check)" >&6;}
|
||||
elif test "$version_num" != '15' -a "$version_num" != '16' -a "$version_num" != '17'; then
|
||||
if test "$version_num" != '12' -a "$version_num" != '13'; then
|
||||
as_fn_error $? "Citus is not compatible with the detected PostgreSQL version ${version_num}." "$LINENO" 5
|
||||
else
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: building against PostgreSQL $version_num" >&5
|
||||
|
@ -5393,7 +5360,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by Citus $as_me 13.2devel, which was
|
||||
This file was extended by Citus $as_me 10.1.6, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
|
@ -5455,7 +5422,7 @@ _ACEOF
|
|||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
Citus config.status 13.2devel
|
||||
Citus config.status 10.1.6
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
# everyone needing autoconf installed, the resulting files are checked
|
||||
# into the SCM.
|
||||
|
||||
AC_INIT([Citus], [13.2devel])
|
||||
AC_INIT([Citus], [10.1.6])
|
||||
AC_COPYRIGHT([Copyright (c) Citus Data, Inc.])
|
||||
|
||||
# we'll need sed and awk for some of the version commands
|
||||
|
@ -74,13 +74,7 @@ if test -z "$version_num"; then
|
|||
AC_MSG_ERROR([Could not detect PostgreSQL version from pg_config.])
|
||||
fi
|
||||
|
||||
PGAC_ARG_BOOL(with, pg-version-check, yes,
|
||||
[do not check postgres version during configure])
|
||||
AC_SUBST(with_pg_version_check)
|
||||
|
||||
if test "$with_pg_version_check" = no; then
|
||||
AC_MSG_NOTICE([building against PostgreSQL $version_num (skipped compatibility check)])
|
||||
elif test "$version_num" != '15' -a "$version_num" != '16' -a "$version_num" != '17'; then
|
||||
if test "$version_num" != '12' -a "$version_num" != '13'; then
|
||||
AC_MSG_ERROR([Citus is not compatible with the detected PostgreSQL version ${version_num}.])
|
||||
else
|
||||
AC_MSG_NOTICE([building against PostgreSQL $version_num])
|
Before Width: | Height: | Size: 95 KiB |
Before Width: | Height: | Size: 22 KiB |
Before Width: | Height: | Size: 102 KiB |
Before Width: | Height: | Size: 29 KiB |
Before Width: | Height: | Size: 69 KiB |
Before Width: | Height: | Size: 111 KiB |
Before Width: | Height: | Size: 12 KiB |
Before Width: | Height: | Size: 168 KiB |
|
@ -1,40 +0,0 @@
|
|||
[tool.isort]
|
||||
profile = 'black'
|
||||
|
||||
[tool.black]
|
||||
include = '(src/test/regress/bin/diff-filter|\.pyi?|\.ipynb)$'
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = [
|
||||
"--import-mode=importlib",
|
||||
"--showlocals",
|
||||
"--tb=short",
|
||||
]
|
||||
pythonpath = 'src/test/regress/citus_tests'
|
||||
asyncio_mode = 'auto'
|
||||
|
||||
# Make test discovery quicker from the root dir of the repo
|
||||
testpaths = ['src/test/regress/citus_tests/test']
|
||||
|
||||
# Make test discovery quicker from other directories than root directory
|
||||
norecursedirs = [
|
||||
'*.egg',
|
||||
'.*',
|
||||
'build',
|
||||
'venv',
|
||||
'ci',
|
||||
'vendor',
|
||||
'backend',
|
||||
'bin',
|
||||
'include',
|
||||
'tmp_*',
|
||||
'results',
|
||||
'expected',
|
||||
'sql',
|
||||
'spec',
|
||||
'data',
|
||||
'__pycache__',
|
||||
]
|
||||
|
||||
# Don't find files with test at the end such as run_test.py
|
||||
python_files = ['test_*.py']
|
|
@ -16,6 +16,7 @@ README.* conflict-marker-size=32
|
|||
|
||||
# Test output files that contain extra whitespace
|
||||
*.out -whitespace
|
||||
src/test/regress/output/*.source -whitespace
|
||||
|
||||
# These files are maintained or generated elsewhere. We take them as is.
|
||||
configure -whitespace
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
# The directory used to store columnar sql files after pre-processing them
|
||||
# with 'cpp' in build-time, see src/backend/columnar/Makefile.
|
||||
/build/
|
|
@ -1,60 +0,0 @@
|
|||
citus_subdir = src/backend/columnar
|
||||
citus_top_builddir = ../../..
|
||||
safestringlib_srcdir = $(citus_abs_top_srcdir)/vendor/safestringlib
|
||||
SUBDIRS = . safeclib
|
||||
SUBDIRS +=
|
||||
ENSURE_SUBDIRS_EXIST := $(shell mkdir -p $(SUBDIRS))
|
||||
OBJS += \
|
||||
$(patsubst $(citus_abs_srcdir)/%.c,%.o,$(foreach dir,$(SUBDIRS), $(sort $(wildcard $(citus_abs_srcdir)/$(dir)/*.c))))
|
||||
|
||||
MODULE_big = citus_columnar
|
||||
EXTENSION = citus_columnar
|
||||
|
||||
template_sql_files = $(patsubst $(citus_abs_srcdir)/%,%,$(wildcard $(citus_abs_srcdir)/sql/*.sql))
|
||||
template_downgrade_sql_files = $(patsubst $(citus_abs_srcdir)/sql/downgrades/%,%,$(wildcard $(citus_abs_srcdir)/sql/downgrades/*.sql))
|
||||
generated_sql_files = $(patsubst %,$(citus_abs_srcdir)/build/%,$(template_sql_files))
|
||||
generated_downgrade_sql_files += $(patsubst %,$(citus_abs_srcdir)/build/sql/%,$(template_downgrade_sql_files))
|
||||
|
||||
DATA_built = $(generated_sql_files)
|
||||
|
||||
PG_CPPFLAGS += -I$(libpq_srcdir) -I$(safestringlib_srcdir)/include
|
||||
|
||||
include $(citus_top_builddir)/Makefile.global
|
||||
|
||||
SQL_DEPDIR=.deps/sql
|
||||
SQL_BUILDDIR=build/sql
|
||||
|
||||
$(generated_sql_files): $(citus_abs_srcdir)/build/%: %
|
||||
@mkdir -p $(citus_abs_srcdir)/$(SQL_DEPDIR) $(citus_abs_srcdir)/$(SQL_BUILDDIR)
|
||||
@# -MF is used to store dependency files(.Po) in another directory for separation
|
||||
@# -MT is used to change the target of the rule emitted by dependency generation.
|
||||
@# -P is used to inhibit generation of linemarkers in the output from the preprocessor.
|
||||
@# -undef is used to not predefine any system-specific or GCC-specific macros.
|
||||
@# `man cpp` for further information
|
||||
cd $(citus_abs_srcdir) && cpp -undef -w -P -MMD -MP -MF$(SQL_DEPDIR)/$(*F).Po -MT$@ $< > $@
|
||||
|
||||
$(generated_downgrade_sql_files): $(citus_abs_srcdir)/build/sql/%: sql/downgrades/%
|
||||
@mkdir -p $(citus_abs_srcdir)/$(SQL_DEPDIR) $(citus_abs_srcdir)/$(SQL_BUILDDIR)
|
||||
@# -MF is used to store dependency files(.Po) in another directory for separation
|
||||
@# -MT is used to change the target of the rule emitted by dependency generation.
|
||||
@# -P is used to inhibit generation of linemarkers in the output from the preprocessor.
|
||||
@# -undef is used to not predefine any system-specific or GCC-specific macros.
|
||||
@# `man cpp` for further information
|
||||
cd $(citus_abs_srcdir) && cpp -undef -w -P -MMD -MP -MF$(SQL_DEPDIR)/$(*F).Po -MT$@ $< > $@
|
||||
|
||||
.PHONY: install install-downgrades install-all
|
||||
|
||||
cleanup-before-install:
|
||||
rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus_columnar.control
|
||||
rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/columnar--*
|
||||
rm -f $(DESTDIR)$(datadir)/$(datamoduledir)/citus_columnar--*
|
||||
|
||||
install: cleanup-before-install
|
||||
|
||||
# install and install-downgrades should be run sequentially
|
||||
install-all: install
|
||||
$(MAKE) install-downgrades
|
||||
|
||||
install-downgrades: $(generated_downgrade_sql_files)
|
||||
$(INSTALL_DATA) $(generated_downgrade_sql_files) '$(DESTDIR)$(datadir)/$(datamoduledir)/'
|
||||
|
|
@ -41,7 +41,7 @@ Benefits of Citus Columnar over cstore_fdw:
|
|||
* Append-only (no ``UPDATE``/``DELETE`` support)
|
||||
* No space reclamation (e.g. rolled-back transactions may still
|
||||
consume disk space)
|
||||
* No bitmap index scans
|
||||
* No index support, index scans, or bitmap index scans
|
||||
* No tidscans
|
||||
* No sample scans
|
||||
* No TOAST support (large values supported inline)
|
||||
|
@ -52,11 +52,13 @@ Benefits of Citus Columnar over cstore_fdw:
|
|||
... FOR UPDATE``)
|
||||
* No support for serializable isolation level
|
||||
* Support for PostgreSQL server versions 12+ only
|
||||
* No support for foreign keys
|
||||
* No support for foreign keys, unique constraints, or exclusion
|
||||
constraints
|
||||
* No support for logical decoding
|
||||
* No support for intra-node parallel scans
|
||||
* No support for ``AFTER ... FOR EACH ROW`` triggers
|
||||
* No `UNLOGGED` columnar tables
|
||||
* No `TEMPORARY` columnar tables
|
||||
|
||||
Future iterations will incrementally lift the limitations listed above.
|
||||
|
||||
|
@ -89,25 +91,38 @@ data.
|
|||
Set options using:
|
||||
|
||||
```sql
|
||||
ALTER TABLE my_columnar_table SET
|
||||
(columnar.compression = none, columnar.stripe_row_limit = 10000);
|
||||
alter_columnar_table_set(
|
||||
relid REGCLASS,
|
||||
chunk_group_row_limit INT4 DEFAULT NULL,
|
||||
stripe_row_limit INT4 DEFAULT NULL,
|
||||
compression NAME DEFAULT NULL,
|
||||
compression_level INT4)
|
||||
```
|
||||
|
||||
For example:
|
||||
|
||||
```sql
|
||||
SELECT alter_columnar_table_set(
|
||||
'my_columnar_table',
|
||||
compression => 'none',
|
||||
stripe_row_limit => 10000);
|
||||
```
|
||||
|
||||
The following options are available:
|
||||
|
||||
* **columnar.compression**: `[none|pglz|zstd|lz4|lz4hc]` - set the compression type
|
||||
* **compression**: `[none|pglz|zstd|lz4|lz4hc]` - set the compression type
|
||||
for _newly-inserted_ data. Existing data will not be
|
||||
recompressed/decompressed. The default value is `zstd` (if support
|
||||
has been compiled in).
|
||||
* **columnar.compression_level**: ``<integer>`` - Sets compression level. Valid
|
||||
* **compression_level**: ``<integer>`` - Sets compression level. Valid
|
||||
settings are from 1 through 19. If the compression method does not
|
||||
support the level chosen, the closest level will be selected
|
||||
instead.
|
||||
* **columnar.stripe_row_limit**: ``<integer>`` - the maximum number of rows per
|
||||
* **stripe_row_limit**: ``<integer>`` - the maximum number of rows per
|
||||
stripe for _newly-inserted_ data. Existing stripes of data will not
|
||||
be changed and may have more rows than this maximum value. The
|
||||
default value is `150000`.
|
||||
* **columnar.chunk_group_row_limit**: ``<integer>`` - the maximum number of rows per
|
||||
* **chunk_group_row_limit**: ``<integer>`` - the maximum number of rows per
|
||||
chunk for _newly-inserted_ data. Existing chunks of data will not be
|
||||
changed and may have more rows than this maximum value. The default
|
||||
value is `10000`.
|
||||
|
@ -173,14 +188,10 @@ operations that are supported on row tables but not columnar
|
|||
data to be updated only affects row tables (e.g. ``UPDATE parent SET
|
||||
i = i + 1 WHERE n = 300``).
|
||||
|
||||
Note that Citus Columnar supports `btree` and `hash `indexes (and
|
||||
the constraints requiring them) but does not support `gist`, `gin`,
|
||||
`spgist` and `brin` indexes.
|
||||
For this reason, if some partitions are columnar and if the index is
|
||||
not supported by Citus Columnar, then it's impossible to create indexes
|
||||
on the partitioned (parent) table directly. In that case, you need to
|
||||
create the index on the individual row partitions. Similarly for the
|
||||
constraints that require indexes, e.g.:
|
||||
Because columnar tables do not support indexes, it's impossible to
|
||||
create indexes on the partitioned table if some partitions are
|
||||
columnar. Instead, you must create indexes on the individual row
|
||||
partitions. Similarly for constraints that require indexes, e.g.:
|
||||
|
||||
```sql
|
||||
CREATE INDEX p2_ts_idx ON p2 (ts);
|
||||
|
@ -233,14 +244,16 @@ CREATE TABLE perf_columnar(LIKE perf_row) USING COLUMNAR;
|
|||
## Data
|
||||
|
||||
```sql
|
||||
CREATE OR REPLACE FUNCTION random_words(n INT4) RETURNS TEXT LANGUAGE sql AS $$
|
||||
WITH words(w) AS (
|
||||
SELECT ARRAY['zero','one','two','three','four','five','six','seven','eight','nine','ten']
|
||||
),
|
||||
random (word) AS (
|
||||
SELECT w[(random()*array_length(w, 1))::int] FROM generate_series(1, $1) AS i, words
|
||||
)
|
||||
SELECT string_agg(word, ' ') FROM random;
|
||||
CREATE OR REPLACE FUNCTION random_words(n INT4) RETURNS TEXT LANGUAGE plpython2u AS $$
|
||||
import random
|
||||
t = ''
|
||||
words = ['zero','one','two','three','four','five','six','seven','eight','nine','ten']
|
||||
for i in xrange(0,n):
|
||||
if (i != 0):
|
||||
t += ' '
|
||||
r = random.randint(0,len(words)-1)
|
||||
t += words[r]
|
||||
return t
|
||||
$$;
|
||||
```
|
||||
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
# Columnar extension
|
||||
comment = 'Citus Columnar extension'
|
||||
default_version = '12.2-1'
|
||||
module_pathname = '$libdir/citus_columnar'
|
||||
relocatable = false
|
||||
schema = pg_catalog
|
|
@ -11,20 +11,17 @@
|
|||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "miscadmin.h"
|
||||
|
||||
#include "utils/guc.h"
|
||||
#include "utils/rel.h"
|
||||
|
||||
#include "citus_version.h"
|
||||
|
||||
#include "columnar/columnar.h"
|
||||
#include "columnar/columnar_tableam.h"
|
||||
|
||||
/* Default values for option parameters */
|
||||
#define DEFAULT_STRIPE_ROW_COUNT 150000
|
||||
|
@ -56,14 +53,6 @@ static const struct config_enum_entry columnar_compression_options[] =
|
|||
{ NULL, 0, false }
|
||||
};
|
||||
|
||||
void
|
||||
columnar_init(void)
|
||||
{
|
||||
columnar_init_gucs();
|
||||
columnar_tableam_init();
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
columnar_init_gucs()
|
||||
{
|
||||
|
|
|
@ -13,22 +13,16 @@
|
|||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "citus_version.h"
|
||||
#include "common/pg_lzcompress.h"
|
||||
#include "lib/stringinfo.h"
|
||||
|
||||
#include "citus_version.h"
|
||||
#include "pg_version_constants.h"
|
||||
|
||||
#include "columnar/columnar_compression.h"
|
||||
|
||||
#if HAVE_CITUS_LIBLZ4
|
||||
#include <lz4.h>
|
||||
#endif
|
||||
|
||||
#if PG_VERSION_NUM >= PG_VERSION_16
|
||||
#include "varatt.h"
|
||||
#endif
|
||||
|
||||
#if HAVE_LIBZSTD
|
||||
#include <zstd.h>
|
||||
#endif
|
||||
|
|
|
@ -10,13 +10,13 @@
|
|||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "funcapi.h"
|
||||
#include "miscadmin.h"
|
||||
|
||||
#include "pg_config.h"
|
||||
#include "access/nbtree.h"
|
||||
#include "access/table.h"
|
||||
#include "catalog/pg_am.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "distributed/pg_version_constants.h"
|
||||
#include "distributed/tuplestore.h"
|
||||
#include "miscadmin.h"
|
||||
#include "storage/fd.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "utils/guc.h"
|
||||
|
@ -24,17 +24,12 @@
|
|||
#include "utils/rel.h"
|
||||
#include "utils/tuplestore.h"
|
||||
|
||||
#include "pg_version_compat.h"
|
||||
#include "pg_version_constants.h"
|
||||
|
||||
#include "columnar/columnar.h"
|
||||
#include "columnar/columnar_storage.h"
|
||||
#include "columnar/columnar_version_compat.h"
|
||||
|
||||
static void MemoryContextTotals(MemoryContext context, MemoryContextCounters *counters);
|
||||
|
||||
PG_FUNCTION_INFO_V1(columnar_store_memory_stats);
|
||||
PG_FUNCTION_INFO_V1(columnar_storage_info);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -55,8 +50,6 @@ columnar_store_memory_stats(PG_FUNCTION_ARGS)
|
|||
TupleDescInitEntry(tupleDescriptor, (AttrNumber) 3, "WriteStateContext",
|
||||
INT8OID, -1, 0);
|
||||
|
||||
tupleDescriptor = BlessTupleDesc(tupleDescriptor);
|
||||
|
||||
MemoryContextCounters transactionCounters = { 0 };
|
||||
MemoryContextCounters topCounters = { 0 };
|
||||
MemoryContextCounters writeStateCounters = { 0 };
|
||||
|
@ -71,75 +64,11 @@ columnar_store_memory_stats(PG_FUNCTION_ARGS)
|
|||
Int64GetDatum(writeStateCounters.totalspace)
|
||||
};
|
||||
|
||||
HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls);
|
||||
Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor);
|
||||
tuplestore_putvalues(tupleStore, tupleDescriptor, values, nulls);
|
||||
tuplestore_donestoring(tupleStore);
|
||||
|
||||
PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* columnar_storage_info - UDF to return internal storage info for a columnar relation.
|
||||
*
|
||||
* DDL:
|
||||
* CREATE OR REPLACE FUNCTION columnar_storage_info(
|
||||
* rel regclass,
|
||||
* version_major OUT int4,
|
||||
* version_minor OUT int4,
|
||||
* storage_id OUT int8,
|
||||
* reserved_stripe_id OUT int8,
|
||||
* reserved_row_number OUT int8,
|
||||
* reserved_offset OUT int8)
|
||||
* STRICT
|
||||
* LANGUAGE c AS 'MODULE_PATHNAME', 'columnar_storage_info';
|
||||
*/
|
||||
Datum
|
||||
columnar_storage_info(PG_FUNCTION_ARGS)
|
||||
{
|
||||
#define STORAGE_INFO_NATTS 6
|
||||
Oid relid = PG_GETARG_OID(0);
|
||||
TupleDesc tupdesc;
|
||||
|
||||
/* Build a tuple descriptor for our result type */
|
||||
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
|
||||
{
|
||||
elog(ERROR, "return type must be a row type");
|
||||
}
|
||||
|
||||
if (tupdesc->natts != STORAGE_INFO_NATTS)
|
||||
{
|
||||
elog(ERROR, "return type must have %d columns", STORAGE_INFO_NATTS);
|
||||
}
|
||||
|
||||
Relation rel = table_open(relid, AccessShareLock);
|
||||
if (!IsColumnarTableAmTable(relid))
|
||||
{
|
||||
ereport(ERROR, (errmsg("table \"%s\" is not a columnar table",
|
||||
RelationGetRelationName(rel))));
|
||||
}
|
||||
|
||||
Datum values[STORAGE_INFO_NATTS] = { 0 };
|
||||
bool nulls[STORAGE_INFO_NATTS] = { 0 };
|
||||
|
||||
/*
|
||||
* Pass force = true so that we can inspect metapages that are not the
|
||||
* current version.
|
||||
*
|
||||
* NB: ensure the order and number of attributes correspond to DDL
|
||||
* declaration.
|
||||
*/
|
||||
values[0] = Int32GetDatum(ColumnarStorageGetVersionMajor(rel, true));
|
||||
values[1] = Int32GetDatum(ColumnarStorageGetVersionMinor(rel, true));
|
||||
values[2] = Int64GetDatum(ColumnarStorageGetStorageId(rel, true));
|
||||
values[3] = Int64GetDatum(ColumnarStorageGetReservedStripeId(rel, true));
|
||||
values[4] = Int64GetDatum(ColumnarStorageGetReservedRowNumber(rel, true));
|
||||
values[5] = Int64GetDatum(ColumnarStorageGetReservedOffset(rel, true));
|
||||
|
||||
/* release lock */
|
||||
table_close(rel, AccessShareLock);
|
||||
|
||||
HeapTuple tuple = heap_form_tuple(tupdesc, values, nulls);
|
||||
|
||||
PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
|
||||
PG_RETURN_DATUM(0);
|
||||
}
|
||||
|
||||
|
||||
|
@ -161,5 +90,5 @@ MemoryContextTotals(MemoryContext context, MemoryContextCounters *counters)
|
|||
MemoryContextTotals(child, counters);
|
||||
}
|
||||
|
||||
context->methods->stats(context, NULL, NULL, counters, true);
|
||||
context->methods->stats(context, NULL, NULL, counters);
|
||||
}
|
||||
|
|
|
@ -19,31 +19,23 @@
|
|||
#include "safe_lib.h"
|
||||
|
||||
#include "access/nbtree.h"
|
||||
#include "access/xact.h"
|
||||
#include "catalog/pg_am.h"
|
||||
#include "commands/defrem.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
#include "nodes/nodeFuncs.h"
|
||||
#include "optimizer/clauses.h"
|
||||
#include "optimizer/optimizer.h"
|
||||
#include "optimizer/clauses.h"
|
||||
#include "optimizer/restrictinfo.h"
|
||||
#include "storage/fd.h"
|
||||
#include "utils/guc.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/rel.h"
|
||||
|
||||
#include "columnar/columnar.h"
|
||||
#include "columnar/columnar_storage.h"
|
||||
#include "columnar/columnar_tableam.h"
|
||||
#include "columnar/columnar_version_compat.h"
|
||||
|
||||
#include "distributed/listutils.h"
|
||||
|
||||
#define UNEXPECTED_STRIPE_READ_ERR_MSG \
|
||||
"attempted to read an unexpected stripe while reading columnar " \
|
||||
"table %s, stripe with id=" UINT64_FORMAT " is not flushed"
|
||||
|
||||
typedef struct ChunkGroupReadState
|
||||
{
|
||||
int64 currentRow;
|
||||
|
@ -70,10 +62,11 @@ typedef struct StripeReadState
|
|||
|
||||
struct ColumnarReadState
|
||||
{
|
||||
List *stripeList;
|
||||
TupleDesc tupleDescriptor;
|
||||
Relation relation;
|
||||
|
||||
StripeMetadata *currentStripeMetadata;
|
||||
int64 currentStripe; /* index of current stripe */
|
||||
StripeReadState *stripeReadState;
|
||||
|
||||
/*
|
||||
|
@ -87,41 +80,18 @@ struct ColumnarReadState
|
|||
|
||||
MemoryContext stripeReadContext;
|
||||
int64 chunkGroupsFiltered;
|
||||
|
||||
/*
|
||||
* Memory context guaranteed to be not freed during scan so we can
|
||||
* safely use for any memory allocations regarding ColumnarReadState
|
||||
* itself.
|
||||
*/
|
||||
MemoryContext scanContext;
|
||||
|
||||
Snapshot snapshot;
|
||||
bool snapshotRegisteredByUs;
|
||||
};
|
||||
|
||||
/* static function declarations */
|
||||
static MemoryContext CreateStripeReadMemoryContext(void);
|
||||
static bool ColumnarReadIsCurrentStripe(ColumnarReadState *readState,
|
||||
uint64 rowNumber);
|
||||
static StripeMetadata * ColumnarReadGetCurrentStripe(ColumnarReadState *readState);
|
||||
static void ReadStripeRowByRowNumber(ColumnarReadState *readState,
|
||||
uint64 rowNumber, Datum *columnValues,
|
||||
bool *columnNulls);
|
||||
static bool StripeReadIsCurrentChunkGroup(StripeReadState *stripeReadState,
|
||||
int chunkGroupIndex);
|
||||
static void ReadChunkGroupRowByRowOffset(ChunkGroupReadState *chunkGroupReadState,
|
||||
StripeMetadata *stripeMetadata,
|
||||
uint64 stripeRowOffset, Datum *columnValues,
|
||||
bool *columnNulls);
|
||||
static bool StripeReadInProgress(ColumnarReadState *readState);
|
||||
static bool HasUnreadStripe(ColumnarReadState *readState);
|
||||
static StripeReadState * BeginStripeRead(StripeMetadata *stripeMetadata, Relation rel,
|
||||
TupleDesc tupleDesc, List *projectedColumnList,
|
||||
List *whereClauseList, List *whereClauseVars,
|
||||
MemoryContext stripeReadContext,
|
||||
Snapshot snapshot);
|
||||
MemoryContext stripeReadContext);
|
||||
static void EndStripeRead(StripeReadState *stripeReadState);
|
||||
static void AdvanceStripeRead(ColumnarReadState *readState);
|
||||
static bool SnapshotMightSeeUnflushedStripes(Snapshot snapshot);
|
||||
static bool ReadStripeNextRow(StripeReadState *stripeReadState, Datum *columnValues,
|
||||
bool *columnNulls);
|
||||
static ChunkGroupReadState * BeginChunkGroupRead(StripeBuffers *stripeBuffers, int
|
||||
|
@ -139,8 +109,7 @@ static StripeBuffers * LoadFilteredStripeBuffers(Relation relation,
|
|||
List *projectedColumnList,
|
||||
List *whereClauseList,
|
||||
List *whereClauseVars,
|
||||
int64 *chunkGroupsFiltered,
|
||||
Snapshot snapshot);
|
||||
int64 *chunkGroupsFiltered);
|
||||
static ColumnBuffers * LoadColumnBuffers(Relation relation,
|
||||
ColumnChunkSkipNode *chunkSkipNodeArray,
|
||||
uint32 chunkCount, uint64 stripeOffset,
|
||||
|
@ -178,10 +147,17 @@ static Datum ColumnDefaultValue(TupleConstr *tupleConstraints,
|
|||
*/
|
||||
ColumnarReadState *
|
||||
ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor,
|
||||
List *projectedColumnList, List *whereClauseList,
|
||||
MemoryContext scanContext, Snapshot snapshot,
|
||||
bool randomAccess)
|
||||
List *projectedColumnList, List *whereClauseList)
|
||||
{
|
||||
List *stripeList = StripesForRelfilenode(relation->rd_node);
|
||||
StripeMetadata *stripeMetadata = NULL;
|
||||
|
||||
uint64 totalRowCount = 0;
|
||||
foreach_ptr(stripeMetadata, stripeList)
|
||||
{
|
||||
totalRowCount += stripeMetadata->rowCount;
|
||||
}
|
||||
|
||||
/*
|
||||
* We allocate all stripe specific data in the stripeReadContext, and reset
|
||||
* this memory context before loading a new stripe. This is to avoid memory
|
||||
|
@ -191,6 +167,7 @@ ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor,
|
|||
|
||||
ColumnarReadState *readState = palloc0(sizeof(ColumnarReadState));
|
||||
readState->relation = relation;
|
||||
readState->stripeList = stripeList;
|
||||
readState->projectedColumnList = projectedColumnList;
|
||||
readState->whereClauseList = whereClauseList;
|
||||
readState->whereClauseVars = GetClauseVars(whereClauseList, tupleDescriptor->natts);
|
||||
|
@ -198,111 +175,11 @@ ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor,
|
|||
readState->tupleDescriptor = tupleDescriptor;
|
||||
readState->stripeReadContext = stripeReadContext;
|
||||
readState->stripeReadState = NULL;
|
||||
readState->scanContext = scanContext;
|
||||
|
||||
/*
|
||||
* Note that ColumnarReadFlushPendingWrites might update those two by
|
||||
* registering a new snapshot.
|
||||
*/
|
||||
readState->snapshot = snapshot;
|
||||
readState->snapshotRegisteredByUs = false;
|
||||
|
||||
if (!randomAccess)
|
||||
{
|
||||
/*
|
||||
* When doing random access (i.e.: index scan), we don't need to flush
|
||||
* pending writes until we need to read them.
|
||||
* columnar_index_fetch_tuple would do so when needed.
|
||||
*/
|
||||
ColumnarReadFlushPendingWrites(readState);
|
||||
|
||||
/*
|
||||
* AdvanceStripeRead sets currentStripeMetadata for the first stripe
|
||||
* to read if not doing random access. Otherwise, reader (i.e.:
|
||||
* ColumnarReadRowByRowNumber) would already decide the stripe to read
|
||||
* on-the-fly.
|
||||
*
|
||||
* Moreover, Since we don't flush pending writes for random access,
|
||||
* AdvanceStripeRead might encounter with stripe metadata entries due
|
||||
* to current transaction's pending writes even when using an MVCC
|
||||
* snapshot, but AdvanceStripeRead would throw an error for that.
|
||||
* Note that this is not the case with for plain table scan methods
|
||||
* (i.e.: SeqScan and Columnar CustomScan).
|
||||
*
|
||||
* For those reasons, we don't call AdvanceStripeRead if we will do
|
||||
* random access.
|
||||
*/
|
||||
AdvanceStripeRead(readState);
|
||||
}
|
||||
|
||||
return readState;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarReadFlushPendingWrites flushes pending writes for read operation
|
||||
* and sets a new (registered) snapshot if necessary.
|
||||
*
|
||||
* If it sets a new snapshot, then sets snapshotRegisteredByUs to true to
|
||||
* indicate that caller should unregister the snapshot after finishing read
|
||||
* operation.
|
||||
*
|
||||
* Note that this function assumes that readState's relation and snapshot
|
||||
* fields are already set.
|
||||
*/
|
||||
void
|
||||
ColumnarReadFlushPendingWrites(ColumnarReadState *readState)
|
||||
{
|
||||
Assert(!readState->snapshotRegisteredByUs);
|
||||
|
||||
RelFileNumber relfilenumber = RelationPhysicalIdentifierNumber_compat(
|
||||
RelationPhysicalIdentifier_compat(readState->relation));
|
||||
FlushWriteStateForRelfilenumber(relfilenumber, GetCurrentSubTransactionId());
|
||||
|
||||
if (readState->snapshot == InvalidSnapshot || !IsMVCCSnapshot(readState->snapshot))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we flushed any pending writes, then we should guarantee that
|
||||
* those writes are visible to us too. For this reason, if given
|
||||
* snapshot is an MVCC snapshot, then we set its curcid to current
|
||||
* command id.
|
||||
*
|
||||
* For simplicity, we do that even if we didn't flush any writes
|
||||
* since we don't see any problem with that.
|
||||
*
|
||||
* XXX: We should either not update cid if we are executing a FETCH
|
||||
* (from cursor) command, or we should have a better way to deal with
|
||||
* pending writes, see the discussion in
|
||||
* https://github.com/citusdata/citus/issues/5231.
|
||||
*/
|
||||
PushCopiedSnapshot(readState->snapshot);
|
||||
|
||||
/* now our snapshot is the active one */
|
||||
UpdateActiveSnapshotCommandId();
|
||||
Snapshot newSnapshot = GetActiveSnapshot();
|
||||
RegisterSnapshot(newSnapshot);
|
||||
|
||||
/*
|
||||
* To be able to use UpdateActiveSnapshotCommandId, we pushed the
|
||||
* copied snapshot to the stack. However, we don't need to keep it
|
||||
* there since we will anyway rely on ColumnarReadState->snapshot
|
||||
* during read operation.
|
||||
*
|
||||
* Note that since we registered the snapshot already, we guarantee
|
||||
* that PopActiveSnapshot won't free it.
|
||||
*/
|
||||
PopActiveSnapshot();
|
||||
|
||||
readState->snapshot = newSnapshot;
|
||||
|
||||
/* not forget to unregister it when finishing read operation */
|
||||
readState->snapshotRegisteredByUs = true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CreateStripeReadMemoryContext creates a memory context to be used when
|
||||
* reading a stripe.
|
||||
|
@ -317,12 +194,11 @@ CreateStripeReadMemoryContext()
|
|||
|
||||
/*
|
||||
* ColumnarReadNextRow tries to read a row from the columnar table. On success, it sets
|
||||
* column values, column nulls and rowNumber (if passed to be non-NULL), and returns true.
|
||||
* If there are no more rows to read, the function returns false.
|
||||
* column values and nulls, and returns true. If there are no more rows to read,
|
||||
* the function returns false.
|
||||
*/
|
||||
bool
|
||||
ColumnarReadNextRow(ColumnarReadState *readState, Datum *columnValues, bool *columnNulls,
|
||||
uint64 *rowNumber)
|
||||
ColumnarReadNextRow(ColumnarReadState *readState, Datum *columnValues, bool *columnNulls)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
|
@ -333,14 +209,15 @@ ColumnarReadNextRow(ColumnarReadState *readState, Datum *columnValues, bool *col
|
|||
return false;
|
||||
}
|
||||
|
||||
readState->stripeReadState = BeginStripeRead(readState->currentStripeMetadata,
|
||||
StripeMetadata *stripeMetadata = list_nth(readState->stripeList,
|
||||
readState->currentStripe);
|
||||
readState->stripeReadState = BeginStripeRead(stripeMetadata,
|
||||
readState->relation,
|
||||
readState->tupleDescriptor,
|
||||
readState->projectedColumnList,
|
||||
readState->whereClauseList,
|
||||
readState->whereClauseVars,
|
||||
readState->stripeReadContext,
|
||||
readState->snapshot);
|
||||
readState->stripeReadContext);
|
||||
}
|
||||
|
||||
if (!ReadStripeNextRow(readState->stripeReadState, columnValues, columnNulls))
|
||||
|
@ -349,12 +226,6 @@ ColumnarReadNextRow(ColumnarReadState *readState, Datum *columnValues, bool *col
|
|||
continue;
|
||||
}
|
||||
|
||||
if (rowNumber)
|
||||
{
|
||||
*rowNumber = readState->currentStripeMetadata->firstRowNumber +
|
||||
readState->stripeReadState->currentRow - 1;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -362,202 +233,6 @@ ColumnarReadNextRow(ColumnarReadState *readState, Datum *columnValues, bool *col
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarReadRowByRowNumberOrError is a wrapper around
|
||||
* ColumnarReadRowByRowNumber that throws an error if tuple
|
||||
* with rowNumber does not exist.
|
||||
*/
|
||||
void
|
||||
ColumnarReadRowByRowNumberOrError(ColumnarReadState *readState,
|
||||
uint64 rowNumber, Datum *columnValues,
|
||||
bool *columnNulls)
|
||||
{
|
||||
if (!ColumnarReadRowByRowNumber(readState, rowNumber,
|
||||
columnValues, columnNulls))
|
||||
{
|
||||
ereport(ERROR, (errmsg("cannot read from columnar table %s, tuple with "
|
||||
"row number " UINT64_FORMAT " does not exist",
|
||||
RelationGetRelationName(readState->relation),
|
||||
rowNumber)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarReadRowByRowNumber reads row with rowNumber from given relation
|
||||
* into columnValues and columnNulls, and returns true. If no such row
|
||||
* exists, then returns false.
|
||||
*/
|
||||
bool
|
||||
ColumnarReadRowByRowNumber(ColumnarReadState *readState,
|
||||
uint64 rowNumber, Datum *columnValues,
|
||||
bool *columnNulls)
|
||||
{
|
||||
if (!ColumnarReadIsCurrentStripe(readState, rowNumber))
|
||||
{
|
||||
Relation columnarRelation = readState->relation;
|
||||
Snapshot snapshot = readState->snapshot;
|
||||
StripeMetadata *stripeMetadata = FindStripeByRowNumber(columnarRelation,
|
||||
rowNumber, snapshot);
|
||||
if (stripeMetadata == NULL)
|
||||
{
|
||||
/* no such row exists */
|
||||
return false;
|
||||
}
|
||||
|
||||
if (StripeWriteState(stripeMetadata) != STRIPE_WRITE_FLUSHED)
|
||||
{
|
||||
/*
|
||||
* Callers are expected to skip stripes that are not flushed to
|
||||
* disk yet or should wait for the writer xact to commit or abort,
|
||||
* but let's be on the safe side.
|
||||
*/
|
||||
ereport(ERROR, (errmsg(UNEXPECTED_STRIPE_READ_ERR_MSG,
|
||||
RelationGetRelationName(columnarRelation),
|
||||
stripeMetadata->id)));
|
||||
}
|
||||
|
||||
/* do the cleanup before reading a new stripe */
|
||||
ColumnarResetRead(readState);
|
||||
|
||||
TupleDesc relationTupleDesc = RelationGetDescr(columnarRelation);
|
||||
List *whereClauseList = NIL;
|
||||
List *whereClauseVars = NIL;
|
||||
MemoryContext stripeReadContext = readState->stripeReadContext;
|
||||
readState->stripeReadState = BeginStripeRead(stripeMetadata,
|
||||
columnarRelation,
|
||||
relationTupleDesc,
|
||||
readState->projectedColumnList,
|
||||
whereClauseList,
|
||||
whereClauseVars,
|
||||
stripeReadContext,
|
||||
snapshot);
|
||||
|
||||
readState->currentStripeMetadata = stripeMetadata;
|
||||
}
|
||||
|
||||
ReadStripeRowByRowNumber(readState, rowNumber, columnValues, columnNulls);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarReadIsCurrentStripe returns true if stripe being read contains
|
||||
* row with given rowNumber.
|
||||
*/
|
||||
static bool
|
||||
ColumnarReadIsCurrentStripe(ColumnarReadState *readState, uint64 rowNumber)
|
||||
{
|
||||
if (!StripeReadInProgress(readState))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
StripeMetadata *currentStripeMetadata = readState->currentStripeMetadata;
|
||||
if (rowNumber >= currentStripeMetadata->firstRowNumber &&
|
||||
rowNumber <= StripeGetHighestRowNumber(currentStripeMetadata))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarReadGetCurrentStripe returns StripeMetadata for the stripe that is
|
||||
* being read.
|
||||
*/
|
||||
static StripeMetadata *
|
||||
ColumnarReadGetCurrentStripe(ColumnarReadState *readState)
|
||||
{
|
||||
return readState->currentStripeMetadata;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ReadStripeRowByRowNumber reads row with rowNumber from given
|
||||
* stripeReadState into columnValues and columnNulls.
|
||||
* Errors out if no such row exists in the stripe being read.
|
||||
*/
|
||||
static void
|
||||
ReadStripeRowByRowNumber(ColumnarReadState *readState,
|
||||
uint64 rowNumber, Datum *columnValues,
|
||||
bool *columnNulls)
|
||||
{
|
||||
StripeMetadata *stripeMetadata = ColumnarReadGetCurrentStripe(readState);
|
||||
StripeReadState *stripeReadState = readState->stripeReadState;
|
||||
|
||||
if (rowNumber < stripeMetadata->firstRowNumber)
|
||||
{
|
||||
/* not expected but be on the safe side */
|
||||
ereport(ERROR, (errmsg("row offset cannot be negative")));
|
||||
}
|
||||
|
||||
/* find the exact chunk group to be read */
|
||||
uint64 stripeRowOffset = rowNumber - stripeMetadata->firstRowNumber;
|
||||
int chunkGroupIndex = stripeRowOffset / stripeMetadata->chunkGroupRowCount;
|
||||
if (!StripeReadIsCurrentChunkGroup(stripeReadState, chunkGroupIndex))
|
||||
{
|
||||
if (stripeReadState->chunkGroupReadState)
|
||||
{
|
||||
EndChunkGroupRead(stripeReadState->chunkGroupReadState);
|
||||
}
|
||||
|
||||
stripeReadState->chunkGroupIndex = chunkGroupIndex;
|
||||
stripeReadState->chunkGroupReadState = BeginChunkGroupRead(
|
||||
stripeReadState->stripeBuffers,
|
||||
stripeReadState->chunkGroupIndex,
|
||||
stripeReadState->tupleDescriptor,
|
||||
stripeReadState->projectedColumnList,
|
||||
stripeReadState->stripeReadContext);
|
||||
}
|
||||
|
||||
ReadChunkGroupRowByRowOffset(stripeReadState->chunkGroupReadState,
|
||||
stripeMetadata, stripeRowOffset,
|
||||
columnValues, columnNulls);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* StripeReadIsCurrentChunkGroup returns true if chunk group being read is
|
||||
* the has given chunkGroupIndex in its stripe.
|
||||
*/
|
||||
static bool
|
||||
StripeReadIsCurrentChunkGroup(StripeReadState *stripeReadState, int chunkGroupIndex)
|
||||
{
|
||||
if (!stripeReadState->chunkGroupReadState)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return (stripeReadState->chunkGroupIndex == chunkGroupIndex);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ReadChunkGroupRowByRowOffset reads row with stripeRowOffset from given
|
||||
* chunkGroupReadState into columnValues and columnNulls.
|
||||
* Errors out if no such row exists in the chunk group being read.
|
||||
*/
|
||||
static void
|
||||
ReadChunkGroupRowByRowOffset(ChunkGroupReadState *chunkGroupReadState,
|
||||
StripeMetadata *stripeMetadata,
|
||||
uint64 stripeRowOffset, Datum *columnValues,
|
||||
bool *columnNulls)
|
||||
{
|
||||
/* set the exact row number to be read from given chunk roup */
|
||||
chunkGroupReadState->currentRow = stripeRowOffset %
|
||||
stripeMetadata->chunkGroupRowCount;
|
||||
if (!ReadChunkGroupNextRow(chunkGroupReadState, columnValues, columnNulls))
|
||||
{
|
||||
/* not expected but be on the safe side */
|
||||
ereport(ERROR, (errmsg("could not find the row in stripe")));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* StripeReadInProgress returns true if we already started reading a stripe.
|
||||
*/
|
||||
|
@ -575,7 +250,8 @@ StripeReadInProgress(ColumnarReadState *readState)
|
|||
static bool
|
||||
HasUnreadStripe(ColumnarReadState *readState)
|
||||
{
|
||||
return readState->currentStripeMetadata != NULL;
|
||||
uint32 stripeCount = list_length(readState->stripeList);
|
||||
return readState->currentStripe < stripeCount;
|
||||
}
|
||||
|
||||
|
||||
|
@ -584,19 +260,11 @@ HasUnreadStripe(ColumnarReadState *readState)
|
|||
* the beginning again
|
||||
*/
|
||||
void
|
||||
ColumnarRescan(ColumnarReadState *readState, List *scanQual)
|
||||
ColumnarRescan(ColumnarReadState *readState)
|
||||
{
|
||||
MemoryContext oldContext = MemoryContextSwitchTo(readState->scanContext);
|
||||
|
||||
ColumnarResetRead(readState);
|
||||
|
||||
/* set currentStripeMetadata for the first stripe to read */
|
||||
AdvanceStripeRead(readState);
|
||||
|
||||
readState->stripeReadState = NULL;
|
||||
readState->currentStripe = 0;
|
||||
readState->chunkGroupsFiltered = 0;
|
||||
|
||||
readState->whereClauseList = copyObject(scanQual);
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
}
|
||||
|
||||
|
||||
|
@ -606,50 +274,19 @@ ColumnarRescan(ColumnarReadState *readState, List *scanQual)
|
|||
void
|
||||
ColumnarEndRead(ColumnarReadState *readState)
|
||||
{
|
||||
if (readState->snapshotRegisteredByUs)
|
||||
{
|
||||
/*
|
||||
* init_columnar_read_state created a new snapshot and registered it,
|
||||
* so now forget it.
|
||||
*/
|
||||
UnregisterSnapshot(readState->snapshot);
|
||||
}
|
||||
|
||||
MemoryContextDelete(readState->stripeReadContext);
|
||||
if (readState->currentStripeMetadata)
|
||||
{
|
||||
pfree(readState->currentStripeMetadata);
|
||||
}
|
||||
|
||||
list_free_deep(readState->stripeList);
|
||||
pfree(readState);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarResetRead resets the stripe and the chunk group that is
|
||||
* being read currently (if any).
|
||||
*/
|
||||
void
|
||||
ColumnarResetRead(ColumnarReadState *readState)
|
||||
{
|
||||
if (StripeReadInProgress(readState))
|
||||
{
|
||||
pfree(readState->currentStripeMetadata);
|
||||
readState->currentStripeMetadata = NULL;
|
||||
|
||||
readState->stripeReadState = NULL;
|
||||
MemoryContextReset(readState->stripeReadContext);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* BeginStripeRead allocates state for reading a stripe.
|
||||
*/
|
||||
static StripeReadState *
|
||||
BeginStripeRead(StripeMetadata *stripeMetadata, Relation rel, TupleDesc tupleDesc,
|
||||
List *projectedColumnList, List *whereClauseList, List *whereClauseVars,
|
||||
MemoryContext stripeReadContext, Snapshot snapshot)
|
||||
MemoryContext stripeReadContext)
|
||||
{
|
||||
MemoryContext oldContext = MemoryContextSwitchTo(stripeReadContext);
|
||||
|
||||
|
@ -669,8 +306,7 @@ BeginStripeRead(StripeMetadata *stripeMetadata, Relation rel, TupleDesc tupleDes
|
|||
whereClauseList,
|
||||
whereClauseVars,
|
||||
&stripeReadState->
|
||||
chunkGroupsFiltered,
|
||||
snapshot);
|
||||
chunkGroupsFiltered);
|
||||
|
||||
stripeReadState->rowCount = stripeReadState->stripeBuffers->rowCount;
|
||||
|
||||
|
@ -682,84 +318,29 @@ BeginStripeRead(StripeMetadata *stripeMetadata, Relation rel, TupleDesc tupleDes
|
|||
|
||||
|
||||
/*
|
||||
* AdvanceStripeRead updates chunkGroupsFiltered and sets
|
||||
* currentStripeMetadata for next stripe read.
|
||||
* EndStripeRead finishes a stripe read.
|
||||
*/
|
||||
static void
|
||||
AdvanceStripeRead(ColumnarReadState *readState)
|
||||
EndStripeRead(StripeReadState *stripeReadState)
|
||||
{
|
||||
MemoryContext oldContext = MemoryContextSwitchTo(readState->scanContext);
|
||||
|
||||
/* if not read any stripes yet, start from the first one .. */
|
||||
uint64 lastReadRowNumber = COLUMNAR_INVALID_ROW_NUMBER;
|
||||
if (StripeReadInProgress(readState))
|
||||
{
|
||||
/* .. otherwise, continue with the next stripe */
|
||||
lastReadRowNumber = StripeGetHighestRowNumber(readState->currentStripeMetadata);
|
||||
|
||||
readState->chunkGroupsFiltered +=
|
||||
readState->stripeReadState->chunkGroupsFiltered;
|
||||
}
|
||||
|
||||
readState->currentStripeMetadata = FindNextStripeByRowNumber(readState->relation,
|
||||
lastReadRowNumber,
|
||||
readState->snapshot);
|
||||
|
||||
if (readState->currentStripeMetadata &&
|
||||
StripeWriteState(readState->currentStripeMetadata) != STRIPE_WRITE_FLUSHED &&
|
||||
!SnapshotMightSeeUnflushedStripes(readState->snapshot))
|
||||
{
|
||||
/*
|
||||
* To be on the safe side, error out if we don't expect to encounter
|
||||
* with an un-flushed stripe. Otherwise, we will skip such stripes
|
||||
* until finding a flushed one.
|
||||
*/
|
||||
ereport(ERROR, (errmsg(UNEXPECTED_STRIPE_READ_ERR_MSG,
|
||||
RelationGetRelationName(readState->relation),
|
||||
readState->currentStripeMetadata->id)));
|
||||
}
|
||||
|
||||
while (readState->currentStripeMetadata &&
|
||||
StripeWriteState(readState->currentStripeMetadata) != STRIPE_WRITE_FLUSHED)
|
||||
{
|
||||
readState->currentStripeMetadata =
|
||||
FindNextStripeByRowNumber(readState->relation,
|
||||
readState->currentStripeMetadata->firstRowNumber,
|
||||
readState->snapshot);
|
||||
}
|
||||
|
||||
readState->stripeReadState = NULL;
|
||||
MemoryContextReset(readState->stripeReadContext);
|
||||
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
pfree(stripeReadState);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SnapshotMightSeeUnflushedStripes returns true if given snapshot is
|
||||
* expected to see un-flushed stripes either because of other backends'
|
||||
* pending writes or aborted transactions.
|
||||
* AdvanceStripeRead updates chunkGroupsFiltered and increments currentStripe
|
||||
* for next stripe read.
|
||||
*/
|
||||
static bool
|
||||
SnapshotMightSeeUnflushedStripes(Snapshot snapshot)
|
||||
static void
|
||||
AdvanceStripeRead(ColumnarReadState *readState)
|
||||
{
|
||||
if (snapshot == InvalidSnapshot)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
readState->chunkGroupsFiltered +=
|
||||
readState->stripeReadState->chunkGroupsFiltered;
|
||||
EndStripeRead(readState->stripeReadState);
|
||||
|
||||
switch (snapshot->snapshot_type)
|
||||
{
|
||||
case SNAPSHOT_ANY:
|
||||
case SNAPSHOT_DIRTY:
|
||||
case SNAPSHOT_NON_VACUUMABLE:
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
readState->currentStripe++;
|
||||
readState->stripeReadState = NULL;
|
||||
MemoryContextReset(readState->stripeReadContext);
|
||||
}
|
||||
|
||||
|
||||
|
@ -880,7 +461,7 @@ ReadChunkGroupNextRow(ChunkGroupReadState *chunkGroupReadState, Datum *columnVal
|
|||
memset(columnNulls, true, sizeof(bool) * chunkGroupReadState->columnCount);
|
||||
|
||||
int attno;
|
||||
foreach_declared_int(attno, chunkGroupReadState->projectedColumnList)
|
||||
foreach_int(attno, chunkGroupReadState->projectedColumnList)
|
||||
{
|
||||
const ChunkData *chunkGroupData = chunkGroupReadState->chunkGroupData;
|
||||
const int rowIndex = chunkGroupReadState->currentRow;
|
||||
|
@ -986,8 +567,7 @@ ColumnarTableRowCount(Relation relation)
|
|||
{
|
||||
ListCell *stripeMetadataCell = NULL;
|
||||
uint64 totalRowCount = 0;
|
||||
List *stripeList = StripesForRelfilelocator(RelationPhysicalIdentifier_compat(
|
||||
relation));
|
||||
List *stripeList = StripesForRelfilenode(relation->rd_node);
|
||||
|
||||
foreach(stripeMetadataCell, stripeList)
|
||||
{
|
||||
|
@ -1008,19 +588,17 @@ static StripeBuffers *
|
|||
LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata,
|
||||
TupleDesc tupleDescriptor, List *projectedColumnList,
|
||||
List *whereClauseList, List *whereClauseVars,
|
||||
int64 *chunkGroupsFiltered, Snapshot snapshot)
|
||||
int64 *chunkGroupsFiltered)
|
||||
{
|
||||
uint32 columnIndex = 0;
|
||||
uint32 columnCount = tupleDescriptor->natts;
|
||||
|
||||
bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList);
|
||||
|
||||
StripeSkipList *stripeSkipList = ReadStripeSkipList(RelationPhysicalIdentifier_compat(
|
||||
relation),
|
||||
StripeSkipList *stripeSkipList = ReadStripeSkipList(relation->rd_node,
|
||||
stripeMetadata->id,
|
||||
tupleDescriptor,
|
||||
stripeMetadata->chunkCount,
|
||||
snapshot);
|
||||
stripeMetadata->chunkCount);
|
||||
|
||||
bool *selectedChunkMask = SelectedChunkMask(stripeSkipList, whereClauseList,
|
||||
whereClauseVars, chunkGroupsFiltered);
|
||||
|
@ -1089,11 +667,7 @@ LoadColumnBuffers(Relation relation, ColumnChunkSkipNode *chunkSkipNodeArray,
|
|||
{
|
||||
ColumnChunkSkipNode *chunkSkipNode = &chunkSkipNodeArray[chunkIndex];
|
||||
uint64 existsOffset = stripeOffset + chunkSkipNode->existsChunkOffset;
|
||||
StringInfo rawExistsBuffer = makeStringInfo();
|
||||
|
||||
enlargeStringInfo(rawExistsBuffer, chunkSkipNode->existsLength);
|
||||
rawExistsBuffer->len = chunkSkipNode->existsLength;
|
||||
ColumnarStorageRead(relation, existsOffset, rawExistsBuffer->data,
|
||||
StringInfo rawExistsBuffer = ReadFromSmgr(relation, existsOffset,
|
||||
chunkSkipNode->existsLength);
|
||||
|
||||
chunkBuffersArray[chunkIndex]->existsBuffer = rawExistsBuffer;
|
||||
|
@ -1105,11 +679,7 @@ LoadColumnBuffers(Relation relation, ColumnChunkSkipNode *chunkSkipNodeArray,
|
|||
ColumnChunkSkipNode *chunkSkipNode = &chunkSkipNodeArray[chunkIndex];
|
||||
CompressionType compressionType = chunkSkipNode->valueCompressionType;
|
||||
uint64 valueOffset = stripeOffset + chunkSkipNode->valueChunkOffset;
|
||||
StringInfo rawValueBuffer = makeStringInfo();
|
||||
|
||||
enlargeStringInfo(rawValueBuffer, chunkSkipNode->valueLength);
|
||||
rawValueBuffer->len = chunkSkipNode->valueLength;
|
||||
ColumnarStorageRead(relation, valueOffset, rawValueBuffer->data,
|
||||
StringInfo rawValueBuffer = ReadFromSmgr(relation, valueOffset,
|
||||
chunkSkipNode->valueLength);
|
||||
|
||||
chunkBuffersArray[chunkIndex]->valueBuffer = rawValueBuffer;
|
||||
|
@ -1489,7 +1059,7 @@ ProjectedColumnMask(uint32 columnCount, List *projectedColumnList)
|
|||
bool *projectedColumnMask = palloc0(columnCount * sizeof(bool));
|
||||
int attno;
|
||||
|
||||
foreach_declared_int(attno, projectedColumnList)
|
||||
foreach_int(attno, projectedColumnList)
|
||||
{
|
||||
/* attno is 1-indexed; projectedColumnMask is 0-indexed */
|
||||
int columnIndex = attno - 1;
|
||||
|
@ -1561,7 +1131,7 @@ DeserializeDatumArray(StringInfo datumBuffer, bool *existsArray, uint32 datumCou
|
|||
datumTypeLength);
|
||||
currentDatumDataOffset = att_addlength_datum(currentDatumDataOffset,
|
||||
datumTypeLength,
|
||||
datumArray[datumIndex]);
|
||||
currentDatumDataPointer);
|
||||
currentDatumDataOffset = att_align_nominal(currentDatumDataOffset,
|
||||
datumTypeAlign);
|
||||
|
||||
|
@ -1613,6 +1183,13 @@ DeserializeChunkData(StripeBuffers *stripeBuffers, uint64 chunkIndex,
|
|||
chunkBuffers->valueCompressionType,
|
||||
chunkBuffers->decompressedValueSize);
|
||||
|
||||
if (chunkBuffers->valueCompressionType != COMPRESSION_NONE)
|
||||
{
|
||||
/* compressed data is not needed anymore */
|
||||
pfree(chunkBuffers->valueBuffer->data);
|
||||
pfree(chunkBuffers->valueBuffer);
|
||||
}
|
||||
|
||||
DeserializeBoolArray(chunkBuffers->existsBuffer,
|
||||
chunkData->existsArray[columnIndex],
|
||||
rowCount);
|
||||
|
@ -1692,3 +1269,30 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor
|
|||
"does not evaluate to constant value")));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
StringInfo
|
||||
ReadFromSmgr(Relation rel, uint64 offset, uint32 size)
|
||||
{
|
||||
StringInfo resultBuffer = makeStringInfo();
|
||||
uint64 read = 0;
|
||||
|
||||
enlargeStringInfo(resultBuffer, size);
|
||||
resultBuffer->len = size;
|
||||
|
||||
while (read < size)
|
||||
{
|
||||
SmgrAddr addr = logical_to_smgr(offset + read);
|
||||
|
||||
Buffer buffer = ReadBuffer(rel, addr.blockno);
|
||||
Page page = BufferGetPage(buffer);
|
||||
PageHeader phdr = (PageHeader) page;
|
||||
|
||||
uint32 to_read = Min(size - read, phdr->pd_upper - addr.offset);
|
||||
memcpy_s(resultBuffer->data + read, size - read, page + addr.offset, to_read);
|
||||
ReleaseBuffer(buffer);
|
||||
read += to_read;
|
||||
}
|
||||
|
||||
return resultBuffer;
|
||||
}
|
||||
|
|
|
@ -1,866 +0,0 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* columnar_storage.c
|
||||
*
|
||||
* Copyright (c) Citus Data, Inc.
|
||||
*
|
||||
* Low-level storage layer for columnar.
|
||||
* - Translates columnar read/write operations on logical offsets into operations on pages/blocks.
|
||||
* - Emits WAL.
|
||||
* - Reads/writes the columnar metapage.
|
||||
* - Reserves data offsets, stripe numbers, and row offsets.
|
||||
* - Truncation.
|
||||
*
|
||||
* Higher-level columnar operations deal with logical offsets and large
|
||||
* contiguous buffers of data that need to be stored. But the buffer manager
|
||||
* and WAL depend on formatted pages with headers, so these large buffers need
|
||||
* to be written across many pages. This module translates the contiguous
|
||||
* buffers into individual block reads/writes, and performs WAL when
|
||||
* necessary.
|
||||
*
|
||||
* Storage layout: a metapage in block 0, followed by an empty page in block
|
||||
* 1, followed by logical data starting at the first byte after the page
|
||||
* header in block 2 (having logical offset ColumnarFirstLogicalOffset). (XXX:
|
||||
* Block 1 is left empty for no particular reason. Reconsider?). A columnar
|
||||
* table should always have at least 2 blocks.
|
||||
*
|
||||
* Reservation is done with a relation extension lock, and designed for
|
||||
* concurrency, so the callers only need an ordinary lock on the
|
||||
* relation. Initializing the metapage or truncating the relation require that
|
||||
* the caller holds an AccessExclusiveLock. (XXX: New reservations of data are
|
||||
* aligned onto a new page for no particular reason. Reconsider?).
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "miscadmin.h"
|
||||
#include "safe_lib.h"
|
||||
|
||||
#include "access/generic_xlog.h"
|
||||
#include "catalog/storage.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/lmgr.h"
|
||||
|
||||
#include "pg_version_compat.h"
|
||||
|
||||
#include "columnar/columnar.h"
|
||||
#include "columnar/columnar_storage.h"
|
||||
|
||||
|
||||
/*
|
||||
* Content of the first page in main fork, which stores metadata at file
|
||||
* level.
|
||||
*/
|
||||
typedef struct ColumnarMetapage
|
||||
{
|
||||
/*
|
||||
* Store version of file format used, so we can detect files from
|
||||
* previous versions if we change file format.
|
||||
*/
|
||||
uint32 versionMajor;
|
||||
uint32 versionMinor;
|
||||
|
||||
/*
|
||||
* Each of the metadata table rows are identified by a storageId.
|
||||
* We store it also in the main fork so we can link metadata rows
|
||||
* with data files.
|
||||
*/
|
||||
uint64 storageId;
|
||||
|
||||
uint64 reservedStripeId; /* first unused stripe id */
|
||||
uint64 reservedRowNumber; /* first unused row number */
|
||||
uint64 reservedOffset; /* first unused byte offset */
|
||||
|
||||
/*
|
||||
* Flag set to true in the init fork. After an unlogged table reset (due
|
||||
* to a crash), the init fork will be copied over the main fork. When
|
||||
* trying to read an unlogged table, if this flag is set to true, we must
|
||||
* clear the metadata for the table (because the actual data is gone,
|
||||
* too), and clear the flag. We can cross-check that the table is
|
||||
* UNLOGGED, and that the main fork is at the minimum size (no actual
|
||||
* data).
|
||||
*
|
||||
* XXX: Not used yet; reserved field for later support for UNLOGGED.
|
||||
*/
|
||||
bool unloggedReset;
|
||||
} ColumnarMetapage;
|
||||
|
||||
|
||||
/* represents a "physical" block+offset address */
|
||||
typedef struct PhysicalAddr
|
||||
{
|
||||
BlockNumber blockno;
|
||||
uint32 offset;
|
||||
} PhysicalAddr;
|
||||
|
||||
|
||||
#define COLUMNAR_METAPAGE_BLOCKNO 0
|
||||
#define COLUMNAR_EMPTY_BLOCKNO 1
|
||||
#define COLUMNAR_INVALID_STRIPE_ID 0
|
||||
#define COLUMNAR_FIRST_STRIPE_ID 1
|
||||
|
||||
|
||||
#define OLD_METAPAGE_VERSION_HINT "Use \"VACUUM\" to upgrade the columnar table format " \
|
||||
"version or run \"ALTER EXTENSION citus UPDATE\"."
|
||||
|
||||
|
||||
/* only for testing purposes */
|
||||
PG_FUNCTION_INFO_V1(test_columnar_storage_write_new_page);
|
||||
|
||||
|
||||
/*
|
||||
* Map logical offsets to a physical page and offset where the data is kept.
|
||||
*/
|
||||
static inline PhysicalAddr
|
||||
LogicalToPhysical(uint64 logicalOffset)
|
||||
{
|
||||
PhysicalAddr addr;
|
||||
|
||||
addr.blockno = logicalOffset / COLUMNAR_BYTES_PER_PAGE;
|
||||
addr.offset = SizeOfPageHeaderData + (logicalOffset % COLUMNAR_BYTES_PER_PAGE);
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Map a physical page and offset address to a logical address.
|
||||
*/
|
||||
static inline uint64
|
||||
PhysicalToLogical(PhysicalAddr addr)
|
||||
{
|
||||
return COLUMNAR_BYTES_PER_PAGE * addr.blockno + addr.offset - SizeOfPageHeaderData;
|
||||
}
|
||||
|
||||
|
||||
static void ColumnarOverwriteMetapage(Relation relation,
|
||||
ColumnarMetapage columnarMetapage);
|
||||
static ColumnarMetapage ColumnarMetapageRead(Relation rel, bool force);
|
||||
static void ReadFromBlock(Relation rel, BlockNumber blockno, uint32 offset,
|
||||
char *buf, uint32 len, bool force);
|
||||
static void WriteToBlock(Relation rel, BlockNumber blockno, uint32 offset,
|
||||
char *buf, uint32 len, bool clear);
|
||||
static uint64 AlignReservation(uint64 prevReservation);
|
||||
static bool ColumnarMetapageIsCurrent(ColumnarMetapage *metapage);
|
||||
static bool ColumnarMetapageIsOlder(ColumnarMetapage *metapage);
|
||||
static bool ColumnarMetapageIsNewer(ColumnarMetapage *metapage);
|
||||
static void ColumnarMetapageCheckVersion(Relation rel, ColumnarMetapage *metapage);
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarStorageInit - initialize a new metapage in an empty relation
|
||||
* with the given storageId.
|
||||
*
|
||||
* Caller must hold AccessExclusiveLock on the relation.
|
||||
*/
|
||||
void
|
||||
ColumnarStorageInit(SMgrRelation srel, uint64 storageId)
|
||||
{
|
||||
BlockNumber nblocks = smgrnblocks(srel, MAIN_FORKNUM);
|
||||
|
||||
if (nblocks > 0)
|
||||
{
|
||||
elog(ERROR,
|
||||
"attempted to initialize metapage, but %d pages already exist",
|
||||
nblocks);
|
||||
}
|
||||
|
||||
/* create two pages */
|
||||
#if PG_VERSION_NUM >= PG_VERSION_16
|
||||
PGIOAlignedBlock block;
|
||||
#else
|
||||
PGAlignedBlock block;
|
||||
#endif
|
||||
Page page = block.data;
|
||||
|
||||
/* write metapage */
|
||||
PageInit(page, BLCKSZ, 0);
|
||||
PageHeader phdr = (PageHeader) page;
|
||||
|
||||
ColumnarMetapage metapage = { 0 };
|
||||
metapage.storageId = storageId;
|
||||
metapage.versionMajor = COLUMNAR_VERSION_MAJOR;
|
||||
metapage.versionMinor = COLUMNAR_VERSION_MINOR;
|
||||
metapage.reservedStripeId = COLUMNAR_FIRST_STRIPE_ID;
|
||||
metapage.reservedRowNumber = COLUMNAR_FIRST_ROW_NUMBER;
|
||||
metapage.reservedOffset = ColumnarFirstLogicalOffset;
|
||||
metapage.unloggedReset = false;
|
||||
memcpy_s(page + phdr->pd_lower, phdr->pd_upper - phdr->pd_lower,
|
||||
(char *) &metapage, sizeof(ColumnarMetapage));
|
||||
phdr->pd_lower += sizeof(ColumnarMetapage);
|
||||
|
||||
log_newpage(RelationPhysicalIdentifierBackend_compat(&srel), MAIN_FORKNUM,
|
||||
COLUMNAR_METAPAGE_BLOCKNO, page, true);
|
||||
PageSetChecksumInplace(page, COLUMNAR_METAPAGE_BLOCKNO);
|
||||
smgrextend(srel, MAIN_FORKNUM, COLUMNAR_METAPAGE_BLOCKNO, page, true);
|
||||
|
||||
/* write empty page */
|
||||
PageInit(page, BLCKSZ, 0);
|
||||
|
||||
log_newpage(RelationPhysicalIdentifierBackend_compat(&srel), MAIN_FORKNUM,
|
||||
COLUMNAR_EMPTY_BLOCKNO, page, true);
|
||||
PageSetChecksumInplace(page, COLUMNAR_EMPTY_BLOCKNO);
|
||||
smgrextend(srel, MAIN_FORKNUM, COLUMNAR_EMPTY_BLOCKNO, page, true);
|
||||
|
||||
/*
|
||||
* An immediate sync is required even if we xlog'd the page, because the
|
||||
* write did not go through shared_buffers and therefore a concurrent
|
||||
* checkpoint may have moved the redo pointer past our xlog record.
|
||||
*/
|
||||
smgrimmedsync(srel, MAIN_FORKNUM);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarStorageUpdateCurrent - update the metapage to the current
|
||||
* version. No effect if the version already matches. If 'upgrade' is true,
|
||||
* throw an error if metapage version is newer; if 'upgrade' is false, it's a
|
||||
* downgrade, so throw an error if the metapage version is older.
|
||||
*
|
||||
* NB: caller must ensure that metapage already exists, which might not be the
|
||||
* case on 10.0.
|
||||
*/
|
||||
void
|
||||
ColumnarStorageUpdateCurrent(Relation rel, bool upgrade, uint64 reservedStripeId,
|
||||
uint64 reservedRowNumber, uint64 reservedOffset)
|
||||
{
|
||||
LockRelationForExtension(rel, ExclusiveLock);
|
||||
|
||||
ColumnarMetapage metapage = ColumnarMetapageRead(rel, true);
|
||||
|
||||
if (ColumnarMetapageIsCurrent(&metapage))
|
||||
{
|
||||
/* nothing to do */
|
||||
return;
|
||||
}
|
||||
|
||||
if (upgrade && ColumnarMetapageIsNewer(&metapage))
|
||||
{
|
||||
elog(ERROR, "found newer columnar metapage while upgrading");
|
||||
}
|
||||
|
||||
if (!upgrade && ColumnarMetapageIsOlder(&metapage))
|
||||
{
|
||||
elog(ERROR, "found older columnar metapage while downgrading");
|
||||
}
|
||||
|
||||
metapage.versionMajor = COLUMNAR_VERSION_MAJOR;
|
||||
metapage.versionMinor = COLUMNAR_VERSION_MINOR;
|
||||
|
||||
/* storageId remains the same */
|
||||
metapage.reservedStripeId = reservedStripeId;
|
||||
metapage.reservedRowNumber = reservedRowNumber;
|
||||
metapage.reservedOffset = reservedOffset;
|
||||
|
||||
ColumnarOverwriteMetapage(rel, metapage);
|
||||
|
||||
UnlockRelationForExtension(rel, ExclusiveLock);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarStorageGetVersionMajor - return major version from the metapage.
|
||||
*
|
||||
* Throw an error if the metapage is not the current version, unless
|
||||
* 'force' is true.
|
||||
*/
|
||||
uint64
|
||||
ColumnarStorageGetVersionMajor(Relation rel, bool force)
|
||||
{
|
||||
ColumnarMetapage metapage = ColumnarMetapageRead(rel, force);
|
||||
|
||||
return metapage.versionMajor;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarStorageGetVersionMinor - return minor version from the metapage.
|
||||
*
|
||||
* Throw an error if the metapage is not the current version, unless
|
||||
* 'force' is true.
|
||||
*/
|
||||
uint64
|
||||
ColumnarStorageGetVersionMinor(Relation rel, bool force)
|
||||
{
|
||||
ColumnarMetapage metapage = ColumnarMetapageRead(rel, force);
|
||||
|
||||
return metapage.versionMinor;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarStorageGetStorageId - return storage ID from the metapage.
|
||||
*
|
||||
* Throw an error if the metapage is not the current version, unless
|
||||
* 'force' is true.
|
||||
*/
|
||||
uint64
|
||||
ColumnarStorageGetStorageId(Relation rel, bool force)
|
||||
{
|
||||
ColumnarMetapage metapage = ColumnarMetapageRead(rel, force);
|
||||
|
||||
return metapage.storageId;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarStorageGetReservedStripeId - return reserved stripe ID from the
|
||||
* metapage.
|
||||
*
|
||||
* Throw an error if the metapage is not the current version, unless
|
||||
* 'force' is true.
|
||||
*/
|
||||
uint64
|
||||
ColumnarStorageGetReservedStripeId(Relation rel, bool force)
|
||||
{
|
||||
ColumnarMetapage metapage = ColumnarMetapageRead(rel, force);
|
||||
|
||||
return metapage.reservedStripeId;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarStorageGetReservedRowNumber - return reserved row number from the
|
||||
* metapage.
|
||||
*
|
||||
* Throw an error if the metapage is not the current version, unless
|
||||
* 'force' is true.
|
||||
*/
|
||||
uint64
|
||||
ColumnarStorageGetReservedRowNumber(Relation rel, bool force)
|
||||
{
|
||||
ColumnarMetapage metapage = ColumnarMetapageRead(rel, force);
|
||||
|
||||
return metapage.reservedRowNumber;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarStorageGetReservedOffset - return reserved offset from the metapage.
|
||||
*
|
||||
* Throw an error if the metapage is not the current version, unless
|
||||
* 'force' is true.
|
||||
*/
|
||||
uint64
|
||||
ColumnarStorageGetReservedOffset(Relation rel, bool force)
|
||||
{
|
||||
ColumnarMetapage metapage = ColumnarMetapageRead(rel, force);
|
||||
|
||||
return metapage.reservedOffset;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarStorageIsCurrent - return true if metapage exists and is not
|
||||
* the current version.
|
||||
*/
|
||||
bool
|
||||
ColumnarStorageIsCurrent(Relation rel)
|
||||
{
|
||||
BlockNumber nblocks = smgrnblocks(RelationGetSmgr(rel), MAIN_FORKNUM);
|
||||
|
||||
if (nblocks < 2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
ColumnarMetapage metapage = ColumnarMetapageRead(rel, true);
|
||||
return ColumnarMetapageIsCurrent(&metapage);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarStorageReserveRowNumber returns reservedRowNumber and advances
|
||||
* it for next row number reservation.
|
||||
*/
|
||||
uint64
|
||||
ColumnarStorageReserveRowNumber(Relation rel, uint64 nrows)
|
||||
{
|
||||
LockRelationForExtension(rel, ExclusiveLock);
|
||||
|
||||
ColumnarMetapage metapage = ColumnarMetapageRead(rel, false);
|
||||
|
||||
uint64 firstRowNumber = metapage.reservedRowNumber;
|
||||
metapage.reservedRowNumber += nrows;
|
||||
|
||||
ColumnarOverwriteMetapage(rel, metapage);
|
||||
|
||||
UnlockRelationForExtension(rel, ExclusiveLock);
|
||||
|
||||
return firstRowNumber;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarStorageReserveStripeId returns stripeId and advances it for next
|
||||
* stripeId reservation.
|
||||
* Note that this function doesn't handle row number reservation.
|
||||
* See ColumnarStorageReserveRowNumber function.
|
||||
*/
|
||||
uint64
|
||||
ColumnarStorageReserveStripeId(Relation rel)
|
||||
{
|
||||
LockRelationForExtension(rel, ExclusiveLock);
|
||||
|
||||
ColumnarMetapage metapage = ColumnarMetapageRead(rel, false);
|
||||
|
||||
uint64 stripeId = metapage.reservedStripeId;
|
||||
metapage.reservedStripeId++;
|
||||
|
||||
ColumnarOverwriteMetapage(rel, metapage);
|
||||
|
||||
UnlockRelationForExtension(rel, ExclusiveLock);
|
||||
|
||||
return stripeId;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarStorageReserveData - reserve logical data offsets for writing.
|
||||
*/
|
||||
uint64
|
||||
ColumnarStorageReserveData(Relation rel, uint64 amount)
|
||||
{
|
||||
if (amount == 0)
|
||||
{
|
||||
return ColumnarInvalidLogicalOffset;
|
||||
}
|
||||
|
||||
LockRelationForExtension(rel, ExclusiveLock);
|
||||
|
||||
ColumnarMetapage metapage = ColumnarMetapageRead(rel, false);
|
||||
|
||||
uint64 alignedReservation = AlignReservation(metapage.reservedOffset);
|
||||
uint64 nextReservation = alignedReservation + amount;
|
||||
metapage.reservedOffset = nextReservation;
|
||||
|
||||
/* write new reservation */
|
||||
ColumnarOverwriteMetapage(rel, metapage);
|
||||
|
||||
/* last used PhysicalAddr of new reservation */
|
||||
PhysicalAddr final = LogicalToPhysical(nextReservation - 1);
|
||||
|
||||
/* extend with new pages */
|
||||
BlockNumber nblocks = smgrnblocks(RelationGetSmgr(rel), MAIN_FORKNUM);
|
||||
|
||||
while (nblocks <= final.blockno)
|
||||
{
|
||||
Buffer newBuffer = ReadBuffer(rel, P_NEW);
|
||||
Assert(BufferGetBlockNumber(newBuffer) == nblocks);
|
||||
ReleaseBuffer(newBuffer);
|
||||
nblocks++;
|
||||
}
|
||||
|
||||
UnlockRelationForExtension(rel, ExclusiveLock);
|
||||
|
||||
return alignedReservation;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarStorageRead - map the logical offset to a block and offset, then
|
||||
* read the buffer from multiple blocks if necessary.
|
||||
*/
|
||||
void
|
||||
ColumnarStorageRead(Relation rel, uint64 logicalOffset, char *data, uint32 amount)
|
||||
{
|
||||
/* if there's no work to do, succeed even with invalid offset */
|
||||
if (amount == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (!ColumnarLogicalOffsetIsValid(logicalOffset))
|
||||
{
|
||||
elog(ERROR,
|
||||
"attempted columnar read on relation %d from invalid logical offset: "
|
||||
UINT64_FORMAT,
|
||||
rel->rd_id, logicalOffset);
|
||||
}
|
||||
|
||||
uint64 read = 0;
|
||||
|
||||
while (read < amount)
|
||||
{
|
||||
PhysicalAddr addr = LogicalToPhysical(logicalOffset + read);
|
||||
|
||||
uint32 to_read = Min(amount - read, BLCKSZ - addr.offset);
|
||||
ReadFromBlock(rel, addr.blockno, addr.offset, data + read, to_read,
|
||||
false);
|
||||
|
||||
read += to_read;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarStorageWrite - map the logical offset to a block and offset, then
|
||||
* write the buffer across multiple blocks if necessary.
|
||||
*/
|
||||
void
|
||||
ColumnarStorageWrite(Relation rel, uint64 logicalOffset, char *data, uint32 amount)
|
||||
{
|
||||
/* if there's no work to do, succeed even with invalid offset */
|
||||
if (amount == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (!ColumnarLogicalOffsetIsValid(logicalOffset))
|
||||
{
|
||||
elog(ERROR,
|
||||
"attempted columnar write on relation %d to invalid logical offset: "
|
||||
UINT64_FORMAT,
|
||||
rel->rd_id, logicalOffset);
|
||||
}
|
||||
|
||||
uint64 written = 0;
|
||||
|
||||
while (written < amount)
|
||||
{
|
||||
PhysicalAddr addr = LogicalToPhysical(logicalOffset + written);
|
||||
|
||||
uint64 to_write = Min(amount - written, BLCKSZ - addr.offset);
|
||||
WriteToBlock(rel, addr.blockno, addr.offset, data + written, to_write,
|
||||
false);
|
||||
|
||||
written += to_write;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarStorageTruncate - truncate the columnar storage such that
|
||||
* newDataReservation will be the first unused logical offset available. Free
|
||||
* pages at the end of the relation.
|
||||
*
|
||||
* Caller must hold AccessExclusiveLock on the relation.
|
||||
*
|
||||
* Returns true if pages were truncated; false otherwise.
|
||||
*/
|
||||
bool
|
||||
ColumnarStorageTruncate(Relation rel, uint64 newDataReservation)
|
||||
{
|
||||
if (!ColumnarLogicalOffsetIsValid(newDataReservation))
|
||||
{
|
||||
elog(ERROR,
|
||||
"attempted to truncate relation %d to invalid logical offset: " UINT64_FORMAT,
|
||||
rel->rd_id, newDataReservation);
|
||||
}
|
||||
|
||||
BlockNumber old_rel_pages = smgrnblocks(RelationGetSmgr(rel), MAIN_FORKNUM);
|
||||
if (old_rel_pages == 0)
|
||||
{
|
||||
/* nothing to do */
|
||||
return false;
|
||||
}
|
||||
|
||||
LockRelationForExtension(rel, ExclusiveLock);
|
||||
|
||||
ColumnarMetapage metapage = ColumnarMetapageRead(rel, false);
|
||||
|
||||
if (metapage.reservedOffset < newDataReservation)
|
||||
{
|
||||
elog(ERROR,
|
||||
"attempted to truncate relation %d to offset " UINT64_FORMAT \
|
||||
" which is higher than existing offset " UINT64_FORMAT,
|
||||
rel->rd_id, newDataReservation, metapage.reservedOffset);
|
||||
}
|
||||
|
||||
if (metapage.reservedOffset == newDataReservation)
|
||||
{
|
||||
/* nothing to do */
|
||||
UnlockRelationForExtension(rel, ExclusiveLock);
|
||||
return false;
|
||||
}
|
||||
|
||||
metapage.reservedOffset = newDataReservation;
|
||||
|
||||
/* write new reservation */
|
||||
ColumnarOverwriteMetapage(rel, metapage);
|
||||
|
||||
UnlockRelationForExtension(rel, ExclusiveLock);
|
||||
|
||||
PhysicalAddr final = LogicalToPhysical(newDataReservation - 1);
|
||||
BlockNumber new_rel_pages = final.blockno + 1;
|
||||
Assert(new_rel_pages <= old_rel_pages);
|
||||
|
||||
/*
|
||||
* Truncate the storage. Note that RelationTruncate() takes care of
|
||||
* Write Ahead Logging.
|
||||
*/
|
||||
if (new_rel_pages < old_rel_pages)
|
||||
{
|
||||
RelationTruncate(rel, new_rel_pages);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarOverwriteMetapage writes given columnarMetapage back to metapage
|
||||
* for given relation.
|
||||
*/
|
||||
static void
|
||||
ColumnarOverwriteMetapage(Relation relation, ColumnarMetapage columnarMetapage)
|
||||
{
|
||||
/* clear metapage because we are overwriting */
|
||||
bool clear = true;
|
||||
WriteToBlock(relation, COLUMNAR_METAPAGE_BLOCKNO, SizeOfPageHeaderData,
|
||||
(char *) &columnarMetapage, sizeof(ColumnarMetapage), clear);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarMetapageRead - read the current contents of the metapage. Error if
|
||||
* it does not exist. Throw an error if the metapage is not the current
|
||||
* version, unless 'force' is true.
|
||||
*
|
||||
* NB: it's safe to read a different version of a metapage because we
|
||||
* guarantee that fields will only be added and existing fields will never be
|
||||
* changed. However, it's important that we don't depend on new fields being
|
||||
* set properly when we read an old metapage; an old metapage should only be
|
||||
* read for the purposes of upgrading or error checking.
|
||||
*/
|
||||
static ColumnarMetapage
|
||||
ColumnarMetapageRead(Relation rel, bool force)
|
||||
{
|
||||
BlockNumber nblocks = smgrnblocks(RelationGetSmgr(rel), MAIN_FORKNUM);
|
||||
if (nblocks == 0)
|
||||
{
|
||||
/*
|
||||
* We only expect this to happen when upgrading citus.so. This is because,
|
||||
* in current version of columnar, we immediately create the metapage
|
||||
* for columnar tables, i.e right after creating the table.
|
||||
* However in older versions, we were creating metapages lazily, i.e
|
||||
* when ingesting data to columnar table.
|
||||
*/
|
||||
ereport(ERROR, (errmsg("columnar metapage for relation \"%s\" does not exist",
|
||||
RelationGetRelationName(rel)),
|
||||
errhint(OLD_METAPAGE_VERSION_HINT)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Regardless of "force" parameter, always force read metapage block.
|
||||
* We will check metapage version in ColumnarMetapageCheckVersion
|
||||
* depending on "force".
|
||||
*/
|
||||
bool forceReadBlock = true;
|
||||
ColumnarMetapage metapage;
|
||||
ReadFromBlock(rel, COLUMNAR_METAPAGE_BLOCKNO, SizeOfPageHeaderData,
|
||||
(char *) &metapage, sizeof(ColumnarMetapage), forceReadBlock);
|
||||
|
||||
if (!force)
|
||||
{
|
||||
ColumnarMetapageCheckVersion(rel, &metapage);
|
||||
}
|
||||
|
||||
return metapage;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ReadFromBlock - read bytes from a page at the given offset. If 'force' is
|
||||
* true, don't check pd_lower; useful when reading a metapage of unknown
|
||||
* version.
|
||||
*/
|
||||
static void
|
||||
ReadFromBlock(Relation rel, BlockNumber blockno, uint32 offset, char *buf,
|
||||
uint32 len, bool force)
|
||||
{
|
||||
Buffer buffer = ReadBuffer(rel, blockno);
|
||||
LockBuffer(buffer, BUFFER_LOCK_SHARE);
|
||||
Page page = BufferGetPage(buffer);
|
||||
PageHeader phdr = (PageHeader) page;
|
||||
|
||||
if (BLCKSZ < offset + len || (!force && (phdr->pd_lower < offset + len)))
|
||||
{
|
||||
elog(ERROR,
|
||||
"attempt to read columnar data of length %d from offset %d of block %d of relation %d",
|
||||
len, offset, blockno, rel->rd_id);
|
||||
}
|
||||
|
||||
memcpy_s(buf, len, page + offset, len);
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* WriteToBlock - append data to a block, initializing if necessary, and emit
|
||||
* WAL. If 'clear' is true, always clear the data on the page and reinitialize
|
||||
* it first, and offset must be SizeOfPageHeaderData. Otherwise, offset must
|
||||
* be equal to pd_lower and pd_lower will be set to the end of the written
|
||||
* data.
|
||||
*/
|
||||
static void
|
||||
WriteToBlock(Relation rel, BlockNumber blockno, uint32 offset, char *buf,
|
||||
uint32 len, bool clear)
|
||||
{
|
||||
Buffer buffer = ReadBuffer(rel, blockno);
|
||||
GenericXLogState *state = GenericXLogStart(rel);
|
||||
|
||||
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||
|
||||
Page page = GenericXLogRegisterBuffer(state, buffer, GENERIC_XLOG_FULL_IMAGE);
|
||||
|
||||
PageHeader phdr = (PageHeader) page;
|
||||
if (PageIsNew(page) || clear)
|
||||
{
|
||||
PageInit(page, BLCKSZ, 0);
|
||||
}
|
||||
|
||||
if (phdr->pd_lower < offset || phdr->pd_upper - offset < len)
|
||||
{
|
||||
elog(ERROR,
|
||||
"attempt to write columnar data of length %d to offset %d of block %d of relation %d",
|
||||
len, offset, blockno, rel->rd_id);
|
||||
}
|
||||
|
||||
/*
|
||||
* After a transaction has been rolled-back, we might be
|
||||
* over-writing the rolledback write, so phdr->pd_lower can be
|
||||
* different from addr.offset.
|
||||
*
|
||||
* We reset pd_lower to reset the rolledback write.
|
||||
*
|
||||
* Given that we always align page reservation to the next page as of
|
||||
* 10.2, having such a disk page is only possible if write operaion
|
||||
* failed in an older version of columnar, but now user attempts writing
|
||||
* to that table in version >= 10.2.
|
||||
*/
|
||||
if (phdr->pd_lower > offset)
|
||||
{
|
||||
ereport(DEBUG4, (errmsg("overwriting page %u", blockno),
|
||||
errdetail("This can happen after a roll-back.")));
|
||||
phdr->pd_lower = offset;
|
||||
}
|
||||
|
||||
memcpy_s(page + phdr->pd_lower, phdr->pd_upper - phdr->pd_lower, buf, len);
|
||||
phdr->pd_lower += len;
|
||||
|
||||
GenericXLogFinish(state);
|
||||
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AlignReservation - given an unused logical byte offset, align it so that it
|
||||
* falls at the start of a page.
|
||||
*
|
||||
* XXX: Reconsider whether we want/need to do this at all.
|
||||
*/
|
||||
static uint64
|
||||
AlignReservation(uint64 prevReservation)
|
||||
{
|
||||
PhysicalAddr prevAddr = LogicalToPhysical(prevReservation);
|
||||
uint64 alignedReservation = prevReservation;
|
||||
|
||||
if (prevAddr.offset != SizeOfPageHeaderData)
|
||||
{
|
||||
/* not aligned; align on beginning of next page */
|
||||
PhysicalAddr initial = { 0 };
|
||||
initial.blockno = prevAddr.blockno + 1;
|
||||
initial.offset = SizeOfPageHeaderData;
|
||||
alignedReservation = PhysicalToLogical(initial);
|
||||
}
|
||||
|
||||
Assert(alignedReservation >= prevReservation);
|
||||
return alignedReservation;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarMetapageIsCurrent - is the metapage at the latest version?
|
||||
*/
|
||||
static bool
|
||||
ColumnarMetapageIsCurrent(ColumnarMetapage *metapage)
|
||||
{
|
||||
return (metapage->versionMajor == COLUMNAR_VERSION_MAJOR &&
|
||||
metapage->versionMinor == COLUMNAR_VERSION_MINOR);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarMetapageIsOlder - is the metapage older than the current version?
|
||||
*/
|
||||
static bool
|
||||
ColumnarMetapageIsOlder(ColumnarMetapage *metapage)
|
||||
{
|
||||
return (metapage->versionMajor < COLUMNAR_VERSION_MAJOR ||
|
||||
(metapage->versionMajor == COLUMNAR_VERSION_MAJOR &&
|
||||
(int) metapage->versionMinor < (int) COLUMNAR_VERSION_MINOR));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarMetapageIsNewer - is the metapage newer than the current version?
|
||||
*/
|
||||
static bool
|
||||
ColumnarMetapageIsNewer(ColumnarMetapage *metapage)
|
||||
{
|
||||
return (metapage->versionMajor > COLUMNAR_VERSION_MAJOR ||
|
||||
(metapage->versionMajor == COLUMNAR_VERSION_MAJOR &&
|
||||
metapage->versionMinor > COLUMNAR_VERSION_MINOR));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ColumnarMetapageCheckVersion - throw an error if accessing old
|
||||
* version of metapage.
|
||||
*/
|
||||
static void
|
||||
ColumnarMetapageCheckVersion(Relation rel, ColumnarMetapage *metapage)
|
||||
{
|
||||
if (!ColumnarMetapageIsCurrent(metapage))
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg(
|
||||
"attempted to access relation \"%s\", which uses an older columnar format",
|
||||
RelationGetRelationName(rel)),
|
||||
errdetail(
|
||||
"Columnar format version %d.%d is required, \"%s\" has version %d.%d.",
|
||||
COLUMNAR_VERSION_MAJOR, COLUMNAR_VERSION_MINOR,
|
||||
RelationGetRelationName(rel),
|
||||
metapage->versionMajor, metapage->versionMinor),
|
||||
errhint(OLD_METAPAGE_VERSION_HINT)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* test_columnar_storage_write_new_page is a UDF only used for testing
|
||||
* purposes. It could make more sense to define this in columnar_debug.c,
|
||||
* but the storage layer doesn't expose ColumnarMetapage to any other files,
|
||||
* so we define it here.
|
||||
*/
|
||||
Datum
|
||||
test_columnar_storage_write_new_page(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
|
||||
Relation relation = relation_open(relationId, AccessShareLock);
|
||||
|
||||
/*
|
||||
* Allocate a new page, write some data to there, and set reserved offset
|
||||
* to the start of that page. That way, for a subsequent write operation,
|
||||
* storage layer would try to overwrite the page that we allocated here.
|
||||
*/
|
||||
uint64 newPageOffset = ColumnarStorageGetReservedOffset(relation, false);
|
||||
|
||||
ColumnarStorageReserveData(relation, 100);
|
||||
ColumnarStorageWrite(relation, newPageOffset, "foo_bar", 8);
|
||||
|
||||
ColumnarMetapage metapage = ColumnarMetapageRead(relation, false);
|
||||
metapage.reservedOffset = newPageOffset;
|
||||
ColumnarOverwriteMetapage(relation, metapage);
|
||||
|
||||
relation_close(relation, AccessShareLock);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
|
@ -16,43 +16,32 @@
|
|||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "miscadmin.h"
|
||||
#include "safe_lib.h"
|
||||
|
||||
#include "access/heapam.h"
|
||||
#include "access/nbtree.h"
|
||||
#include "catalog/pg_am.h"
|
||||
#include "miscadmin.h"
|
||||
#include "storage/fd.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "utils/guc.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/rel.h"
|
||||
|
||||
#include "pg_version_compat.h"
|
||||
#include "pg_version_constants.h"
|
||||
#include "utils/relfilenodemap.h"
|
||||
|
||||
#include "columnar/columnar.h"
|
||||
#include "columnar/columnar_storage.h"
|
||||
#include "columnar/columnar_version_compat.h"
|
||||
|
||||
#if PG_VERSION_NUM >= PG_VERSION_16
|
||||
#include "storage/relfilelocator.h"
|
||||
#include "utils/relfilenumbermap.h"
|
||||
#else
|
||||
#include "utils/relfilenodemap.h"
|
||||
#endif
|
||||
|
||||
struct ColumnarWriteState
|
||||
{
|
||||
TupleDesc tupleDescriptor;
|
||||
FmgrInfo **comparisonFunctionArray;
|
||||
RelFileLocator relfilelocator;
|
||||
RelFileNode relfilenode;
|
||||
|
||||
MemoryContext stripeWriteContext;
|
||||
MemoryContext perTupleContext;
|
||||
StripeBuffers *stripeBuffers;
|
||||
StripeSkipList *stripeSkipList;
|
||||
EmptyStripeReservation *emptyStripeReservation;
|
||||
ColumnarOptions options;
|
||||
ChunkData *chunkData;
|
||||
|
||||
|
@ -93,7 +82,7 @@ static StringInfo CopyStringInfo(StringInfo sourceString);
|
|||
* data load operation.
|
||||
*/
|
||||
ColumnarWriteState *
|
||||
ColumnarBeginWrite(RelFileLocator relfilelocator,
|
||||
ColumnarBeginWrite(RelFileNode relfilenode,
|
||||
ColumnarOptions options,
|
||||
TupleDesc tupleDescriptor)
|
||||
{
|
||||
|
@ -127,19 +116,18 @@ ColumnarBeginWrite(RelFileLocator relfilelocator,
|
|||
ALLOCSET_DEFAULT_SIZES);
|
||||
|
||||
bool *columnMaskArray = palloc(columnCount * sizeof(bool));
|
||||
memset(columnMaskArray, true, columnCount * sizeof(bool));
|
||||
memset(columnMaskArray, true, columnCount);
|
||||
|
||||
ChunkData *chunkData = CreateEmptyChunkData(columnCount, columnMaskArray,
|
||||
options.chunkRowCount);
|
||||
|
||||
ColumnarWriteState *writeState = palloc0(sizeof(ColumnarWriteState));
|
||||
writeState->relfilelocator = relfilelocator;
|
||||
writeState->relfilenode = relfilenode;
|
||||
writeState->options = options;
|
||||
writeState->tupleDescriptor = CreateTupleDescCopy(tupleDescriptor);
|
||||
writeState->comparisonFunctionArray = comparisonFunctionArray;
|
||||
writeState->stripeBuffers = NULL;
|
||||
writeState->stripeSkipList = NULL;
|
||||
writeState->emptyStripeReservation = NULL;
|
||||
writeState->stripeWriteContext = stripeWriteContext;
|
||||
writeState->chunkData = chunkData;
|
||||
writeState->compressionBuffer = NULL;
|
||||
|
@ -158,10 +146,8 @@ ColumnarBeginWrite(RelFileLocator relfilelocator,
|
|||
* corresponding skip nodes. Then, whole chunk data is compressed at every
|
||||
* rowChunkCount insertion. Then, if row count exceeds stripeMaxRowCount, we flush
|
||||
* the stripe, and add its metadata to the table footer.
|
||||
*
|
||||
* Returns the "row number" assigned to written row.
|
||||
*/
|
||||
uint64
|
||||
void
|
||||
ColumnarWriteRow(ColumnarWriteState *writeState, Datum *columnValues, bool *columnNulls)
|
||||
{
|
||||
uint32 columnIndex = 0;
|
||||
|
@ -183,16 +169,6 @@ ColumnarWriteRow(ColumnarWriteState *writeState, Datum *columnValues, bool *colu
|
|||
writeState->stripeSkipList = stripeSkipList;
|
||||
writeState->compressionBuffer = makeStringInfo();
|
||||
|
||||
Oid relationId = RelidByRelfilenumber(RelationTablespace_compat(
|
||||
writeState->relfilelocator),
|
||||
RelationPhysicalIdentifierNumber_compat(
|
||||
writeState->relfilelocator));
|
||||
Relation relation = relation_open(relationId, NoLock);
|
||||
writeState->emptyStripeReservation =
|
||||
ReserveEmptyStripe(relation, columnCount, chunkRowCount,
|
||||
options->stripeRowCount);
|
||||
relation_close(relation, NoLock);
|
||||
|
||||
/*
|
||||
* serializedValueBuffer lives in stripe write memory context so it needs to be
|
||||
* initialized when the stripe is created.
|
||||
|
@ -249,8 +225,6 @@ ColumnarWriteRow(ColumnarWriteState *writeState, Datum *columnValues, bool *colu
|
|||
SerializeChunkData(writeState, chunkIndex, chunkRowCount);
|
||||
}
|
||||
|
||||
uint64 writtenRowNumber = writeState->emptyStripeReservation->stripeFirstRowNumber +
|
||||
stripeBuffers->rowCount;
|
||||
stripeBuffers->rowCount++;
|
||||
if (stripeBuffers->rowCount >= options->stripeRowCount)
|
||||
{
|
||||
|
@ -258,8 +232,6 @@ ColumnarWriteRow(ColumnarWriteState *writeState, Datum *columnValues, bool *colu
|
|||
}
|
||||
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
|
||||
return writtenRowNumber;
|
||||
}
|
||||
|
||||
|
||||
|
@ -379,6 +351,80 @@ CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 chunkRowCount,
|
|||
}
|
||||
|
||||
|
||||
void
|
||||
WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength)
|
||||
{
|
||||
uint64 remaining = dataLength;
|
||||
Buffer buffer;
|
||||
|
||||
while (remaining > 0)
|
||||
{
|
||||
SmgrAddr addr = logical_to_smgr(logicalOffset);
|
||||
|
||||
RelationOpenSmgr(rel);
|
||||
BlockNumber nblocks PG_USED_FOR_ASSERTS_ONLY =
|
||||
smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
|
||||
Assert(addr.blockno < nblocks);
|
||||
RelationCloseSmgr(rel);
|
||||
|
||||
buffer = ReadBuffer(rel, addr.blockno);
|
||||
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||
|
||||
Page page = BufferGetPage(buffer);
|
||||
PageHeader phdr = (PageHeader) page;
|
||||
if (PageIsNew(page))
|
||||
{
|
||||
PageInit(page, BLCKSZ, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* After a transaction has been rolled-back, we might be
|
||||
* over-writing the rolledback write, so phdr->pd_lower can be
|
||||
* different from addr.offset.
|
||||
*
|
||||
* We reset pd_lower to reset the rolledback write.
|
||||
*/
|
||||
if (phdr->pd_lower > addr.offset)
|
||||
{
|
||||
ereport(DEBUG1, (errmsg("over-writing page %u", addr.blockno),
|
||||
errdetail("This can happen after a roll-back.")));
|
||||
phdr->pd_lower = addr.offset;
|
||||
}
|
||||
Assert(phdr->pd_lower == addr.offset);
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
uint64 to_write = Min(phdr->pd_upper - phdr->pd_lower, remaining);
|
||||
memcpy_s(page + phdr->pd_lower, phdr->pd_upper - phdr->pd_lower, data, to_write);
|
||||
phdr->pd_lower += to_write;
|
||||
|
||||
MarkBufferDirty(buffer);
|
||||
|
||||
if (RelationNeedsWAL(rel))
|
||||
{
|
||||
XLogBeginInsert();
|
||||
|
||||
/*
|
||||
* Since columnar will mostly write whole pages we force the transmission of the
|
||||
* whole image in the buffer
|
||||
*/
|
||||
XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE);
|
||||
|
||||
XLogRecPtr recptr = XLogInsert(RM_GENERIC_ID, 0);
|
||||
PageSetLSN(page, recptr);
|
||||
}
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
data += to_write;
|
||||
remaining -= to_write;
|
||||
logicalOffset += to_write;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* FlushStripe flushes current stripe data into the file. The function first ensures
|
||||
* the last data chunk for each column is properly serialized and compressed. Then,
|
||||
|
@ -388,6 +434,7 @@ CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 chunkRowCount,
|
|||
static void
|
||||
FlushStripe(ColumnarWriteState *writeState)
|
||||
{
|
||||
StripeMetadata stripeMetadata = { 0 };
|
||||
uint32 columnIndex = 0;
|
||||
uint32 chunkIndex = 0;
|
||||
StripeBuffers *stripeBuffers = writeState->stripeBuffers;
|
||||
|
@ -404,10 +451,8 @@ FlushStripe(ColumnarWriteState *writeState)
|
|||
|
||||
elog(DEBUG1, "Flushing Stripe of size %d", stripeBuffers->rowCount);
|
||||
|
||||
Oid relationId = RelidByRelfilenumber(RelationTablespace_compat(
|
||||
writeState->relfilelocator),
|
||||
RelationPhysicalIdentifierNumber_compat(
|
||||
writeState->relfilelocator));
|
||||
Oid relationId = RelidByRelfilenode(writeState->relfilenode.spcNode,
|
||||
writeState->relfilenode.relNode);
|
||||
Relation relation = relation_open(relationId, NoLock);
|
||||
|
||||
/*
|
||||
|
@ -455,11 +500,11 @@ FlushStripe(ColumnarWriteState *writeState)
|
|||
}
|
||||
}
|
||||
|
||||
StripeMetadata *stripeMetadata =
|
||||
CompleteStripeReservation(relation, writeState->emptyStripeReservation->stripeId,
|
||||
stripeSize, stripeRowCount, chunkCount);
|
||||
stripeMetadata = ReserveStripe(relation, stripeSize,
|
||||
stripeRowCount, columnCount, chunkCount,
|
||||
chunkRowCount);
|
||||
|
||||
uint64 currentFileOffset = stripeMetadata->fileOffset;
|
||||
uint64 currentFileOffset = stripeMetadata.fileOffset;
|
||||
|
||||
/*
|
||||
* Each stripe has only one section:
|
||||
|
@ -482,7 +527,7 @@ FlushStripe(ColumnarWriteState *writeState)
|
|||
columnBuffers->chunkBuffersArray[chunkIndex];
|
||||
StringInfo existsBuffer = chunkBuffers->existsBuffer;
|
||||
|
||||
ColumnarStorageWrite(relation, currentFileOffset,
|
||||
WriteToSmgr(relation, currentFileOffset,
|
||||
existsBuffer->data, existsBuffer->len);
|
||||
currentFileOffset += existsBuffer->len;
|
||||
}
|
||||
|
@ -493,17 +538,17 @@ FlushStripe(ColumnarWriteState *writeState)
|
|||
columnBuffers->chunkBuffersArray[chunkIndex];
|
||||
StringInfo valueBuffer = chunkBuffers->valueBuffer;
|
||||
|
||||
ColumnarStorageWrite(relation, currentFileOffset,
|
||||
WriteToSmgr(relation, currentFileOffset,
|
||||
valueBuffer->data, valueBuffer->len);
|
||||
currentFileOffset += valueBuffer->len;
|
||||
}
|
||||
}
|
||||
|
||||
SaveChunkGroups(writeState->relfilelocator,
|
||||
stripeMetadata->id,
|
||||
SaveChunkGroups(writeState->relfilenode,
|
||||
stripeMetadata.id,
|
||||
writeState->chunkGroupRowCounts);
|
||||
SaveStripeSkipList(writeState->relfilelocator,
|
||||
stripeMetadata->id,
|
||||
SaveStripeSkipList(writeState->relfilenode,
|
||||
stripeMetadata.id,
|
||||
stripeSkipList, tupleDescriptor);
|
||||
|
||||
writeState->chunkGroupRowCounts = NIL;
|
||||
|
@ -520,7 +565,7 @@ static StringInfo
|
|||
SerializeBoolArray(bool *boolArray, uint32 boolArrayLength)
|
||||
{
|
||||
uint32 boolArrayIndex = 0;
|
||||
uint32 byteCount = ((boolArrayLength * sizeof(bool)) + (8 - sizeof(bool))) / 8;
|
||||
uint32 byteCount = (boolArrayLength + 7) / 8;
|
||||
|
||||
StringInfo boolArrayBuffer = makeStringInfo();
|
||||
enlargeStringInfo(boolArrayBuffer, byteCount);
|
||||
|
@ -544,9 +589,6 @@ SerializeBoolArray(bool *boolArray, uint32 boolArrayLength)
|
|||
/*
|
||||
* SerializeSingleDatum serializes the given datum value and appends it to the
|
||||
* provided string info buffer.
|
||||
*
|
||||
* Since we don't want to limit datum buffer size to RSIZE_MAX unnecessarily,
|
||||
* we use memcpy instead of memcpy_s several places in this function.
|
||||
*/
|
||||
static void
|
||||
SerializeSingleDatum(StringInfo datumBuffer, Datum datum, bool datumTypeByValue,
|
||||
|
@ -568,13 +610,15 @@ SerializeSingleDatum(StringInfo datumBuffer, Datum datum, bool datumTypeByValue,
|
|||
}
|
||||
else
|
||||
{
|
||||
memcpy(currentDatumDataPointer, DatumGetPointer(datum), datumTypeLength); /* IGNORE-BANNED */
|
||||
memcpy_s(currentDatumDataPointer, datumBuffer->maxlen - datumBuffer->len,
|
||||
DatumGetPointer(datum), datumTypeLength);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert(!datumTypeByValue);
|
||||
memcpy(currentDatumDataPointer, DatumGetPointer(datum), datumLength); /* IGNORE-BANNED */
|
||||
memcpy_s(currentDatumDataPointer, datumBuffer->maxlen - datumBuffer->len,
|
||||
DatumGetPointer(datum), datumLength);
|
||||
}
|
||||
|
||||
datumBuffer->len += datumLengthAligned;
|
||||
|
@ -728,12 +772,7 @@ DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength)
|
|||
{
|
||||
uint32 datumLength = att_addlength_datum(0, datumTypeLength, datum);
|
||||
char *datumData = palloc0(datumLength);
|
||||
|
||||
/*
|
||||
* We use IGNORE-BANNED here since we don't want to limit datum size to
|
||||
* RSIZE_MAX unnecessarily.
|
||||
*/
|
||||
memcpy(datumData, DatumGetPointer(datum), datumLength); /* IGNORE-BANNED */
|
||||
memcpy_s(datumData, datumLength, DatumGetPointer(datum), datumLength);
|
||||
|
||||
datumCopy = PointerGetDatum(datumData);
|
||||
}
|
||||
|
@ -756,12 +795,8 @@ CopyStringInfo(StringInfo sourceString)
|
|||
targetString->data = palloc0(sourceString->len);
|
||||
targetString->len = sourceString->len;
|
||||
targetString->maxlen = sourceString->len;
|
||||
|
||||
/*
|
||||
* We use IGNORE-BANNED here since we don't want to limit string
|
||||
* buffer size to RSIZE_MAX unnecessarily.
|
||||
*/
|
||||
memcpy(targetString->data, sourceString->data, sourceString->len); /* IGNORE-BANNED */
|
||||
memcpy_s(targetString->data, sourceString->len,
|
||||
sourceString->data, sourceString->len);
|
||||
}
|
||||
|
||||
return targetString;
|
||||
|
|
|
@ -18,15 +18,20 @@
|
|||
#include "citus_version.h"
|
||||
|
||||
#include "columnar/columnar.h"
|
||||
#include "columnar/mod.h"
|
||||
|
||||
#include "columnar/columnar_tableam.h"
|
||||
|
||||
void
|
||||
columnar_init(void)
|
||||
{
|
||||
columnar_init_gucs();
|
||||
columnar_tableam_init();
|
||||
}
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
|
||||
void _PG_init(void);
|
||||
|
||||
void
|
||||
_PG_init(void)
|
||||
columnar_fini(void)
|
||||
{
|
||||
columnar_init();
|
||||
columnar_tableam_finish();
|
||||
}
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
../../../vendor/safestringlib/safeclib/
|
|
@ -1,32 +0,0 @@
|
|||
-- add columnar objects back
|
||||
ALTER EXTENSION citus_columnar ADD SCHEMA columnar;
|
||||
ALTER EXTENSION citus_columnar ADD SCHEMA columnar_internal;
|
||||
ALTER EXTENSION citus_columnar ADD SEQUENCE columnar_internal.storageid_seq;
|
||||
ALTER EXTENSION citus_columnar ADD TABLE columnar_internal.options;
|
||||
ALTER EXTENSION citus_columnar ADD TABLE columnar_internal.stripe;
|
||||
ALTER EXTENSION citus_columnar ADD TABLE columnar_internal.chunk_group;
|
||||
ALTER EXTENSION citus_columnar ADD TABLE columnar_internal.chunk;
|
||||
|
||||
ALTER EXTENSION citus_columnar ADD FUNCTION columnar_internal.columnar_handler;
|
||||
ALTER EXTENSION citus_columnar ADD ACCESS METHOD columnar;
|
||||
ALTER EXTENSION citus_columnar ADD FUNCTION pg_catalog.alter_columnar_table_set;
|
||||
ALTER EXTENSION citus_columnar ADD FUNCTION pg_catalog.alter_columnar_table_reset;
|
||||
|
||||
ALTER EXTENSION citus_columnar ADD FUNCTION citus_internal.upgrade_columnar_storage;
|
||||
ALTER EXTENSION citus_columnar ADD FUNCTION citus_internal.downgrade_columnar_storage;
|
||||
ALTER EXTENSION citus_columnar ADD FUNCTION citus_internal.columnar_ensure_am_depends_catalog;
|
||||
|
||||
ALTER EXTENSION citus_columnar ADD FUNCTION columnar.get_storage_id;
|
||||
ALTER EXTENSION citus_columnar ADD VIEW columnar.storage;
|
||||
ALTER EXTENSION citus_columnar ADD VIEW columnar.options;
|
||||
ALTER EXTENSION citus_columnar ADD VIEW columnar.stripe;
|
||||
ALTER EXTENSION citus_columnar ADD VIEW columnar.chunk_group;
|
||||
ALTER EXTENSION citus_columnar ADD VIEW columnar.chunk;
|
||||
|
||||
-- move citus_internal functions to columnar_internal
|
||||
|
||||
ALTER FUNCTION citus_internal.upgrade_columnar_storage(regclass) SET SCHEMA columnar_internal;
|
||||
ALTER FUNCTION citus_internal.downgrade_columnar_storage(regclass) SET SCHEMA columnar_internal;
|
||||
ALTER FUNCTION citus_internal.columnar_ensure_am_depends_catalog() SET SCHEMA columnar_internal;
|
||||
|
||||
|
|
@ -1 +0,0 @@
|
|||
-- fake sql file 'Y'
|
|
@ -1,19 +0,0 @@
|
|||
-- citus_columnar--11.1-1--11.2-1
|
||||
|
||||
#include "udfs/columnar_ensure_am_depends_catalog/11.2-1.sql"
|
||||
|
||||
DELETE FROM pg_depend
|
||||
WHERE classid = 'pg_am'::regclass::oid
|
||||
AND objid IN (select oid from pg_am where amname = 'columnar')
|
||||
AND objsubid = 0
|
||||
AND refclassid = 'pg_class'::regclass::oid
|
||||
AND refobjid IN (
|
||||
'columnar_internal.stripe_first_row_number_idx'::regclass::oid,
|
||||
'columnar_internal.chunk_group_pkey'::regclass::oid,
|
||||
'columnar_internal.chunk_pkey'::regclass::oid,
|
||||
'columnar_internal.options_pkey'::regclass::oid,
|
||||
'columnar_internal.stripe_first_row_number_idx'::regclass::oid,
|
||||
'columnar_internal.stripe_pkey'::regclass::oid
|
||||
)
|
||||
AND refobjsubid = 0
|
||||
AND deptype = 'n';
|
|
@ -1,435 +0,0 @@
|
|||
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION citus_columnar" to load this file. \quit
|
||||
|
||||
-- columnar--9.5-1--10.0-1.sql
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS columnar;
|
||||
SET search_path TO columnar;
|
||||
|
||||
|
||||
CREATE SEQUENCE IF NOT EXISTS storageid_seq MINVALUE 10000000000 NO CYCLE;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS options (
|
||||
regclass regclass NOT NULL PRIMARY KEY,
|
||||
chunk_group_row_limit int NOT NULL,
|
||||
stripe_row_limit int NOT NULL,
|
||||
compression_level int NOT NULL,
|
||||
compression name NOT NULL
|
||||
) WITH (user_catalog_table = true);
|
||||
|
||||
COMMENT ON TABLE options IS 'columnar table specific options, maintained by alter_columnar_table_set';
|
||||
|
||||
CREATE TABLE IF NOT EXISTS stripe (
|
||||
storage_id bigint NOT NULL,
|
||||
stripe_num bigint NOT NULL,
|
||||
file_offset bigint NOT NULL,
|
||||
data_length bigint NOT NULL,
|
||||
column_count int NOT NULL,
|
||||
chunk_row_count int NOT NULL,
|
||||
row_count bigint NOT NULL,
|
||||
chunk_group_count int NOT NULL,
|
||||
first_row_number bigint NOT NULL,
|
||||
PRIMARY KEY (storage_id, stripe_num),
|
||||
CONSTRAINT stripe_first_row_number_idx UNIQUE (storage_id, first_row_number)
|
||||
) WITH (user_catalog_table = true);
|
||||
|
||||
COMMENT ON TABLE stripe IS 'Columnar per stripe metadata';
|
||||
|
||||
CREATE TABLE IF NOT EXISTS chunk_group (
|
||||
storage_id bigint NOT NULL,
|
||||
stripe_num bigint NOT NULL,
|
||||
chunk_group_num int NOT NULL,
|
||||
row_count bigint NOT NULL,
|
||||
PRIMARY KEY (storage_id, stripe_num, chunk_group_num)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE chunk_group IS 'Columnar chunk group metadata';
|
||||
|
||||
CREATE TABLE IF NOT EXISTS chunk (
|
||||
storage_id bigint NOT NULL,
|
||||
stripe_num bigint NOT NULL,
|
||||
attr_num int NOT NULL,
|
||||
chunk_group_num int NOT NULL,
|
||||
minimum_value bytea,
|
||||
maximum_value bytea,
|
||||
value_stream_offset bigint NOT NULL,
|
||||
value_stream_length bigint NOT NULL,
|
||||
exists_stream_offset bigint NOT NULL,
|
||||
exists_stream_length bigint NOT NULL,
|
||||
value_compression_type int NOT NULL,
|
||||
value_compression_level int NOT NULL,
|
||||
value_decompressed_length bigint NOT NULL,
|
||||
value_count bigint NOT NULL,
|
||||
PRIMARY KEY (storage_id, stripe_num, attr_num, chunk_group_num)
|
||||
) WITH (user_catalog_table = true);
|
||||
|
||||
COMMENT ON TABLE chunk IS 'Columnar per chunk metadata';
|
||||
|
||||
DO $proc$
|
||||
BEGIN
|
||||
|
||||
-- from version 12 and up we have support for tableam's if installed on pg11 we can't
|
||||
-- create the objects here. Instead we rely on citus_finish_pg_upgrade to be called by the
|
||||
-- user instead to add the missing objects
|
||||
IF substring(current_Setting('server_version'), '\d+')::int >= 12 THEN
|
||||
EXECUTE $$
|
||||
--#include "udfs/columnar_handler/10.0-1.sql"
|
||||
CREATE OR REPLACE FUNCTION columnar.columnar_handler(internal)
|
||||
RETURNS table_am_handler
|
||||
LANGUAGE C
|
||||
AS 'MODULE_PATHNAME', 'columnar_handler';
|
||||
COMMENT ON FUNCTION columnar.columnar_handler(internal)
|
||||
IS 'internal function returning the handler for columnar tables';
|
||||
|
||||
-- postgres 11.8 does not support the syntax for table am, also it is seemingly trying
|
||||
-- to parse the upgrade file and erroring on unknown syntax.
|
||||
-- normally this section would not execute on postgres 11 anyway. To trick it to pass on
|
||||
-- 11.8 we wrap the statement in a plpgsql block together with an EXECUTE. This is valid
|
||||
-- syntax on 11.8 and will execute correctly in 12
|
||||
DO $create_table_am$
|
||||
BEGIN
|
||||
EXECUTE 'CREATE ACCESS METHOD columnar TYPE TABLE HANDLER columnar.columnar_handler';
|
||||
END $create_table_am$;
|
||||
|
||||
--#include "udfs/alter_columnar_table_set/10.0-1.sql"
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.alter_columnar_table_set(
|
||||
table_name regclass,
|
||||
chunk_group_row_limit int DEFAULT NULL,
|
||||
stripe_row_limit int DEFAULT NULL,
|
||||
compression name DEFAULT null,
|
||||
compression_level int DEFAULT NULL)
|
||||
RETURNS void
|
||||
LANGUAGE C
|
||||
AS 'MODULE_PATHNAME', 'alter_columnar_table_set';
|
||||
|
||||
COMMENT ON FUNCTION pg_catalog.alter_columnar_table_set(
|
||||
table_name regclass,
|
||||
chunk_group_row_limit int,
|
||||
stripe_row_limit int,
|
||||
compression name,
|
||||
compression_level int)
|
||||
IS 'set one or more options on a columnar table, when set to NULL no change is made';
|
||||
|
||||
|
||||
--#include "udfs/alter_columnar_table_reset/10.0-1.sql"
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.alter_columnar_table_reset(
|
||||
table_name regclass,
|
||||
chunk_group_row_limit bool DEFAULT false,
|
||||
stripe_row_limit bool DEFAULT false,
|
||||
compression bool DEFAULT false,
|
||||
compression_level bool DEFAULT false)
|
||||
RETURNS void
|
||||
LANGUAGE C
|
||||
AS 'MODULE_PATHNAME', 'alter_columnar_table_reset';
|
||||
|
||||
COMMENT ON FUNCTION pg_catalog.alter_columnar_table_reset(
|
||||
table_name regclass,
|
||||
chunk_group_row_limit bool,
|
||||
stripe_row_limit bool,
|
||||
compression bool,
|
||||
compression_level bool)
|
||||
IS 'reset on or more options on a columnar table to the system defaults';
|
||||
|
||||
$$;
|
||||
END IF;
|
||||
END$proc$;
|
||||
|
||||
-- (this function being dropped in 10.0.3)->#include "udfs/columnar_ensure_objects_exist/10.0-1.sql"
|
||||
|
||||
RESET search_path;
|
||||
|
||||
-- columnar--10.0.-1 --10.0.2
|
||||
GRANT USAGE ON SCHEMA columnar TO PUBLIC;
|
||||
GRANT SELECT ON ALL tables IN SCHEMA columnar TO PUBLIC ;
|
||||
|
||||
-- columnar--10.0-3--10.1-1.sql
|
||||
|
||||
-- Drop foreign keys between columnar metadata tables.
|
||||
|
||||
|
||||
-- columnar--10.1-1--10.2-1.sql
|
||||
|
||||
-- For a proper mapping between tid & (stripe, row_num), add a new column to
|
||||
-- columnar.stripe and define a BTREE index on this column.
|
||||
-- Also include storage_id column for per-relation scans.
|
||||
|
||||
|
||||
-- Populate first_row_number column of columnar.stripe table.
|
||||
--
|
||||
-- For simplicity, we calculate MAX(row_count) value across all the stripes
|
||||
-- of all the columanar tables and then use it to populate first_row_number
|
||||
-- column. This would introduce some gaps however we are okay with that since
|
||||
-- it's already the case with regular INSERT/COPY's.
|
||||
DO $$
|
||||
DECLARE
|
||||
max_row_count bigint;
|
||||
-- this should be equal to columnar_storage.h/COLUMNAR_FIRST_ROW_NUMBER
|
||||
COLUMNAR_FIRST_ROW_NUMBER constant bigint := 1;
|
||||
BEGIN
|
||||
SELECT MAX(row_count) INTO max_row_count FROM columnar.stripe;
|
||||
UPDATE columnar.stripe SET first_row_number = COLUMNAR_FIRST_ROW_NUMBER +
|
||||
(stripe_num - 1) * max_row_count;
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- columnar--10.2-1--10.2-2.sql
|
||||
|
||||
-- revoke read access for columnar.chunk from unprivileged
|
||||
-- user as it contains chunk min/max values
|
||||
REVOKE SELECT ON columnar.chunk FROM PUBLIC;
|
||||
|
||||
|
||||
-- columnar--10.2-2--10.2-3.sql
|
||||
|
||||
-- Since stripe_first_row_number_idx is required to scan a columnar table, we
|
||||
-- need to make sure that it is created before doing anything with columnar
|
||||
-- tables during pg upgrades.
|
||||
--
|
||||
-- However, a plain btree index is not a dependency of a table, so pg_upgrade
|
||||
-- cannot guarantee that stripe_first_row_number_idx gets created when
|
||||
-- creating columnar.stripe, unless we make it a unique "constraint".
|
||||
--
|
||||
-- To do that, drop stripe_first_row_number_idx and create a unique
|
||||
-- constraint with the same name to keep the code change at minimum.
|
||||
|
||||
-- columnar--10.2-3--10.2-4.sql
|
||||
|
||||
|
||||
-- columnar--11.0-2--11.1-1.sql
|
||||
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.alter_columnar_table_set(
|
||||
table_name regclass,
|
||||
chunk_group_row_limit int DEFAULT NULL,
|
||||
stripe_row_limit int DEFAULT NULL,
|
||||
compression name DEFAULT null,
|
||||
compression_level int DEFAULT NULL)
|
||||
RETURNS void
|
||||
LANGUAGE plpgsql AS
|
||||
$alter_columnar_table_set$
|
||||
declare
|
||||
noop BOOLEAN := true;
|
||||
cmd TEXT := 'ALTER TABLE ' || table_name::text || ' SET (';
|
||||
begin
|
||||
if (chunk_group_row_limit is not null) then
|
||||
if (not noop) then cmd := cmd || ', '; end if;
|
||||
cmd := cmd || 'columnar.chunk_group_row_limit=' || chunk_group_row_limit;
|
||||
noop := false;
|
||||
end if;
|
||||
if (stripe_row_limit is not null) then
|
||||
if (not noop) then cmd := cmd || ', '; end if;
|
||||
cmd := cmd || 'columnar.stripe_row_limit=' || stripe_row_limit;
|
||||
noop := false;
|
||||
end if;
|
||||
if (compression is not null) then
|
||||
if (not noop) then cmd := cmd || ', '; end if;
|
||||
cmd := cmd || 'columnar.compression=' || compression;
|
||||
noop := false;
|
||||
end if;
|
||||
if (compression_level is not null) then
|
||||
if (not noop) then cmd := cmd || ', '; end if;
|
||||
cmd := cmd || 'columnar.compression_level=' || compression_level;
|
||||
noop := false;
|
||||
end if;
|
||||
cmd := cmd || ')';
|
||||
if (not noop) then
|
||||
execute cmd;
|
||||
end if;
|
||||
return;
|
||||
end;
|
||||
$alter_columnar_table_set$;
|
||||
|
||||
COMMENT ON FUNCTION pg_catalog.alter_columnar_table_set(
|
||||
table_name regclass,
|
||||
chunk_group_row_limit int,
|
||||
stripe_row_limit int,
|
||||
compression name,
|
||||
compression_level int)
|
||||
IS 'set one or more options on a columnar table, when set to NULL no change is made';
|
||||
|
||||
CREATE OR REPLACE FUNCTION pg_catalog.alter_columnar_table_reset(
|
||||
table_name regclass,
|
||||
chunk_group_row_limit bool DEFAULT false,
|
||||
stripe_row_limit bool DEFAULT false,
|
||||
compression bool DEFAULT false,
|
||||
compression_level bool DEFAULT false)
|
||||
RETURNS void
|
||||
LANGUAGE plpgsql AS
|
||||
$alter_columnar_table_reset$
|
||||
declare
|
||||
noop BOOLEAN := true;
|
||||
cmd TEXT := 'ALTER TABLE ' || table_name::text || ' RESET (';
|
||||
begin
|
||||
if (chunk_group_row_limit) then
|
||||
if (not noop) then cmd := cmd || ', '; end if;
|
||||
cmd := cmd || 'columnar.chunk_group_row_limit';
|
||||
noop := false;
|
||||
end if;
|
||||
if (stripe_row_limit) then
|
||||
if (not noop) then cmd := cmd || ', '; end if;
|
||||
cmd := cmd || 'columnar.stripe_row_limit';
|
||||
noop := false;
|
||||
end if;
|
||||
if (compression) then
|
||||
if (not noop) then cmd := cmd || ', '; end if;
|
||||
cmd := cmd || 'columnar.compression';
|
||||
noop := false;
|
||||
end if;
|
||||
if (compression_level) then
|
||||
if (not noop) then cmd := cmd || ', '; end if;
|
||||
cmd := cmd || 'columnar.compression_level';
|
||||
noop := false;
|
||||
end if;
|
||||
cmd := cmd || ')';
|
||||
if (not noop) then
|
||||
execute cmd;
|
||||
end if;
|
||||
return;
|
||||
end;
|
||||
$alter_columnar_table_reset$;
|
||||
|
||||
COMMENT ON FUNCTION pg_catalog.alter_columnar_table_reset(
|
||||
table_name regclass,
|
||||
chunk_group_row_limit bool,
|
||||
stripe_row_limit bool,
|
||||
compression bool,
|
||||
compression_level bool)
|
||||
IS 'reset on or more options on a columnar table to the system defaults';
|
||||
|
||||
-- rename columnar schema to columnar_internal and tighten security
|
||||
|
||||
REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA columnar FROM PUBLIC;
|
||||
ALTER SCHEMA columnar RENAME TO columnar_internal;
|
||||
REVOKE ALL PRIVILEGES ON SCHEMA columnar_internal FROM PUBLIC;
|
||||
|
||||
-- create columnar schema with public usage privileges
|
||||
|
||||
CREATE SCHEMA columnar;
|
||||
GRANT USAGE ON SCHEMA columnar TO PUBLIC;
|
||||
|
||||
--#include "udfs/upgrade_columnar_storage/10.2-1.sql"
|
||||
CREATE OR REPLACE FUNCTION columnar_internal.upgrade_columnar_storage(rel regclass)
|
||||
RETURNS VOID
|
||||
STRICT
|
||||
LANGUAGE c AS 'MODULE_PATHNAME', $$upgrade_columnar_storage$$;
|
||||
|
||||
COMMENT ON FUNCTION columnar_internal.upgrade_columnar_storage(regclass)
|
||||
IS 'function to upgrade the columnar storage, if necessary';
|
||||
|
||||
|
||||
--#include "udfs/downgrade_columnar_storage/10.2-1.sql"
|
||||
|
||||
CREATE OR REPLACE FUNCTION columnar_internal.downgrade_columnar_storage(rel regclass)
|
||||
RETURNS VOID
|
||||
STRICT
|
||||
LANGUAGE c AS 'MODULE_PATHNAME', $$downgrade_columnar_storage$$;
|
||||
|
||||
COMMENT ON FUNCTION columnar_internal.downgrade_columnar_storage(regclass)
|
||||
IS 'function to downgrade the columnar storage, if necessary';
|
||||
|
||||
-- update UDF to account for columnar_internal schema
|
||||
CREATE OR REPLACE FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
|
||||
RETURNS void
|
||||
LANGUAGE plpgsql
|
||||
SET search_path = pg_catalog
|
||||
AS $func$
|
||||
BEGIN
|
||||
INSERT INTO pg_depend
|
||||
WITH columnar_schema_members(relid) AS (
|
||||
SELECT pg_class.oid AS relid FROM pg_class
|
||||
WHERE relnamespace =
|
||||
COALESCE(
|
||||
(SELECT pg_namespace.oid FROM pg_namespace WHERE nspname = 'columnar_internal'),
|
||||
(SELECT pg_namespace.oid FROM pg_namespace WHERE nspname = 'columnar')
|
||||
)
|
||||
AND relname IN ('chunk',
|
||||
'chunk_group',
|
||||
'chunk_group_pkey',
|
||||
'chunk_pkey',
|
||||
'options',
|
||||
'options_pkey',
|
||||
'storageid_seq',
|
||||
'stripe',
|
||||
'stripe_first_row_number_idx',
|
||||
'stripe_pkey')
|
||||
)
|
||||
SELECT -- Define a dependency edge from "columnar table access method" ..
|
||||
'pg_am'::regclass::oid as classid,
|
||||
(select oid from pg_am where amname = 'columnar') as objid,
|
||||
0 as objsubid,
|
||||
-- ... to each object that is registered to pg_class and that lives
|
||||
-- in "columnar" schema. That contains catalog tables, indexes
|
||||
-- created on them and the sequences created in "columnar" schema.
|
||||
--
|
||||
-- Given the possibility of user might have created their own objects
|
||||
-- in columnar schema, we explicitly specify list of objects that we
|
||||
-- are interested in.
|
||||
'pg_class'::regclass::oid as refclassid,
|
||||
columnar_schema_members.relid as refobjid,
|
||||
0 as refobjsubid,
|
||||
'n' as deptype
|
||||
FROM columnar_schema_members
|
||||
-- Avoid inserting duplicate entries into pg_depend.
|
||||
EXCEPT TABLE pg_depend;
|
||||
END;
|
||||
$func$;
|
||||
COMMENT ON FUNCTION columnar_internal.columnar_ensure_am_depends_catalog()
|
||||
IS 'internal function responsible for creating dependencies from columnar '
|
||||
'table access method to the rel objects in columnar schema';
|
||||
|
||||
SELECT columnar_internal.columnar_ensure_am_depends_catalog();
|
||||
|
||||
-- add utility function
|
||||
|
||||
CREATE FUNCTION columnar.get_storage_id(regclass) RETURNS bigint
|
||||
LANGUAGE C STRICT
|
||||
AS 'citus_columnar', $$columnar_relation_storageid$$;
|
||||
|
||||
-- create views for columnar table information
|
||||
|
||||
CREATE VIEW columnar.storage WITH (security_barrier) AS
|
||||
SELECT c.oid::regclass AS relation,
|
||||
columnar.get_storage_id(c.oid) AS storage_id
|
||||
FROM pg_class c, pg_am am
|
||||
WHERE c.relam = am.oid AND am.amname = 'columnar'
|
||||
AND pg_has_role(c.relowner, 'USAGE');
|
||||
COMMENT ON VIEW columnar.storage IS 'Columnar relation ID to storage ID mapping.';
|
||||
GRANT SELECT ON columnar.storage TO PUBLIC;
|
||||
|
||||
CREATE VIEW columnar.options WITH (security_barrier) AS
|
||||
SELECT regclass AS relation, chunk_group_row_limit,
|
||||
stripe_row_limit, compression, compression_level
|
||||
FROM columnar_internal.options o, pg_class c
|
||||
WHERE o.regclass = c.oid
|
||||
AND pg_has_role(c.relowner, 'USAGE');
|
||||
COMMENT ON VIEW columnar.options
|
||||
IS 'Columnar options for tables on which the current user has ownership privileges.';
|
||||
GRANT SELECT ON columnar.options TO PUBLIC;
|
||||
|
||||
CREATE VIEW columnar.stripe WITH (security_barrier) AS
|
||||
SELECT relation, storage.storage_id, stripe_num, file_offset, data_length,
|
||||
column_count, chunk_row_count, row_count, chunk_group_count, first_row_number
|
||||
FROM columnar_internal.stripe stripe, columnar.storage storage
|
||||
WHERE stripe.storage_id = storage.storage_id;
|
||||
COMMENT ON VIEW columnar.stripe
|
||||
IS 'Columnar stripe information for tables on which the current user has ownership privileges.';
|
||||
GRANT SELECT ON columnar.stripe TO PUBLIC;
|
||||
|
||||
CREATE VIEW columnar.chunk_group WITH (security_barrier) AS
|
||||
SELECT relation, storage.storage_id, stripe_num, chunk_group_num, row_count
|
||||
FROM columnar_internal.chunk_group cg, columnar.storage storage
|
||||
WHERE cg.storage_id = storage.storage_id;
|
||||
COMMENT ON VIEW columnar.chunk_group
|
||||
IS 'Columnar chunk group information for tables on which the current user has ownership privileges.';
|
||||
GRANT SELECT ON columnar.chunk_group TO PUBLIC;
|
||||
|
||||
CREATE VIEW columnar.chunk WITH (security_barrier) AS
|
||||
SELECT relation, storage.storage_id, stripe_num, attr_num, chunk_group_num,
|
||||
minimum_value, maximum_value, value_stream_offset, value_stream_length,
|
||||
exists_stream_offset, exists_stream_length, value_compression_type,
|
||||
value_compression_level, value_decompressed_length, value_count
|
||||
FROM columnar_internal.chunk chunk, columnar.storage storage
|
||||
WHERE chunk.storage_id = storage.storage_id;
|
||||
COMMENT ON VIEW columnar.chunk
|
||||
IS 'Columnar chunk information for tables on which the current user has ownership privileges.';
|
||||
GRANT SELECT ON columnar.chunk TO PUBLIC;
|
||||
|
|
@ -1 +0,0 @@
|
|||
-- citus_columnar--11.2-1--11.3-1
|
|
@ -1 +0,0 @@
|
|||
-- citus_columnar--11.3-1--12.2-1
|
|
@ -1,4 +1,4 @@
|
|||
-- columnar--10.0-1--10.0-2.sql
|
||||
/* columnar--10.0-1--10.0-2.sql */
|
||||
|
||||
-- grant read access for columnar metadata tables to unprivileged user
|
||||
GRANT USAGE ON SCHEMA columnar TO PUBLIC;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
-- columnar--10.0-3--10.1-1.sql
|
||||
/* columnar--10.0-3--10.1-1.sql */
|
||||
|
||||
-- Drop foreign keys between columnar metadata tables.
|
||||
-- Postgres assigns different names to those foreign keys in PG11, so act accordingly.
|
||||
|
|
|
@ -1,32 +0,0 @@
|
|||
-- columnar--10.1-1--10.2-1.sql
|
||||
|
||||
-- For a proper mapping between tid & (stripe, row_num), add a new column to
|
||||
-- columnar.stripe and define a BTREE index on this column.
|
||||
-- Also include storage_id column for per-relation scans.
|
||||
ALTER TABLE columnar.stripe ADD COLUMN first_row_number bigint;
|
||||
CREATE INDEX stripe_first_row_number_idx ON columnar.stripe USING BTREE(storage_id, first_row_number);
|
||||
|
||||
-- Populate first_row_number column of columnar.stripe table.
|
||||
--
|
||||
-- For simplicity, we calculate MAX(row_count) value across all the stripes
|
||||
-- of all the columanar tables and then use it to populate first_row_number
|
||||
-- column. This would introduce some gaps however we are okay with that since
|
||||
-- it's already the case with regular INSERT/COPY's.
|
||||
DO $$
|
||||
DECLARE
|
||||
max_row_count bigint;
|
||||
-- this should be equal to columnar_storage.h/COLUMNAR_FIRST_ROW_NUMBER
|
||||
COLUMNAR_FIRST_ROW_NUMBER constant bigint := 1;
|
||||
BEGIN
|
||||
SELECT MAX(row_count) INTO max_row_count FROM columnar.stripe;
|
||||
UPDATE columnar.stripe SET first_row_number = COLUMNAR_FIRST_ROW_NUMBER +
|
||||
(stripe_num - 1) * max_row_count;
|
||||
END;
|
||||
$$;
|
||||
|
||||
#include "udfs/upgrade_columnar_storage/10.2-1.sql"
|
||||
#include "udfs/downgrade_columnar_storage/10.2-1.sql"
|
||||
|
||||
-- upgrade storage for all columnar relations
|
||||
PERFORM citus_internal.upgrade_columnar_storage(c.oid) FROM pg_class c, pg_am a
|
||||
WHERE c.relam = a.oid AND amname = 'columnar';
|
|
@ -1,5 +0,0 @@
|
|||
-- columnar--10.2-1--10.2-2.sql
|
||||
|
||||
-- revoke read access for columnar.chunk from unprivileged
|
||||
-- user as it contains chunk min/max values
|
||||
REVOKE SELECT ON columnar.chunk FROM PUBLIC;
|
|
@ -1,26 +0,0 @@
|
|||
-- columnar--10.2-2--10.2-3.sql
|
||||
|
||||
-- Since stripe_first_row_number_idx is required to scan a columnar table, we
|
||||
-- need to make sure that it is created before doing anything with columnar
|
||||
-- tables during pg upgrades.
|
||||
--
|
||||
-- However, a plain btree index is not a dependency of a table, so pg_upgrade
|
||||
-- cannot guarantee that stripe_first_row_number_idx gets created when
|
||||
-- creating columnar.stripe, unless we make it a unique "constraint".
|
||||
--
|
||||
-- To do that, drop stripe_first_row_number_idx and create a unique
|
||||
-- constraint with the same name to keep the code change at minimum.
|
||||
--
|
||||
-- If we have a pg_depend entry for this index, we can not drop it as
|
||||
-- the extension depends on it. Remove the pg_depend entry if it exists.
|
||||
DELETE FROM pg_depend
|
||||
WHERE classid = 'pg_am'::regclass::oid
|
||||
AND objid IN (select oid from pg_am where amname = 'columnar')
|
||||
AND objsubid = 0
|
||||
AND refclassid = 'pg_class'::regclass::oid
|
||||
AND refobjid = 'columnar.stripe_first_row_number_idx'::regclass::oid
|
||||
AND refobjsubid = 0
|
||||
AND deptype = 'n';
|
||||
DROP INDEX columnar.stripe_first_row_number_idx;
|
||||
ALTER TABLE columnar.stripe ADD CONSTRAINT stripe_first_row_number_idx
|
||||
UNIQUE (storage_id, first_row_number);
|
|
@ -1,5 +0,0 @@
|
|||
-- columnar--10.2-3--10.2-4.sql
|
||||
|
||||
#include "udfs/columnar_ensure_am_depends_catalog/10.2-4.sql"
|
||||
|
||||
PERFORM citus_internal.columnar_ensure_am_depends_catalog();
|
|
@ -1 +0,0 @@
|
|||
-- no changes needed
|
|
@ -1,71 +0,0 @@
|
|||
#include "udfs/alter_columnar_table_set/11.1-1.sql"
|
||||
#include "udfs/alter_columnar_table_reset/11.1-1.sql"
|
||||
|
||||
-- rename columnar schema to columnar_internal and tighten security
|
||||
|
||||
REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA columnar FROM PUBLIC;
|
||||
ALTER SCHEMA columnar RENAME TO columnar_internal;
|
||||
REVOKE ALL PRIVILEGES ON SCHEMA columnar_internal FROM PUBLIC;
|
||||
|
||||
-- create columnar schema with public usage privileges
|
||||
|
||||
CREATE SCHEMA columnar;
|
||||
GRANT USAGE ON SCHEMA columnar TO PUBLIC;
|
||||
|
||||
-- update UDF to account for columnar_internal schema
|
||||
#include "udfs/columnar_ensure_am_depends_catalog/11.1-1.sql"
|
||||
|
||||
-- add utility function
|
||||
|
||||
CREATE FUNCTION columnar.get_storage_id(regclass) RETURNS bigint
|
||||
LANGUAGE C STRICT
|
||||
AS 'citus_columnar', $$columnar_relation_storageid$$;
|
||||
|
||||
-- create views for columnar table information
|
||||
|
||||
CREATE VIEW columnar.storage WITH (security_barrier) AS
|
||||
SELECT c.oid::regclass AS relation,
|
||||
columnar.get_storage_id(c.oid) AS storage_id
|
||||
FROM pg_class c, pg_am am
|
||||
WHERE c.relam = am.oid AND am.amname = 'columnar'
|
||||
AND pg_has_role(c.relowner, 'USAGE');
|
||||
COMMENT ON VIEW columnar.storage IS 'Columnar relation ID to storage ID mapping.';
|
||||
GRANT SELECT ON columnar.storage TO PUBLIC;
|
||||
|
||||
CREATE VIEW columnar.options WITH (security_barrier) AS
|
||||
SELECT regclass AS relation, chunk_group_row_limit,
|
||||
stripe_row_limit, compression, compression_level
|
||||
FROM columnar_internal.options o, pg_class c
|
||||
WHERE o.regclass = c.oid
|
||||
AND pg_has_role(c.relowner, 'USAGE');
|
||||
COMMENT ON VIEW columnar.options
|
||||
IS 'Columnar options for tables on which the current user has ownership privileges.';
|
||||
GRANT SELECT ON columnar.options TO PUBLIC;
|
||||
|
||||
CREATE VIEW columnar.stripe WITH (security_barrier) AS
|
||||
SELECT relation, storage.storage_id, stripe_num, file_offset, data_length,
|
||||
column_count, chunk_row_count, row_count, chunk_group_count, first_row_number
|
||||
FROM columnar_internal.stripe stripe, columnar.storage storage
|
||||
WHERE stripe.storage_id = storage.storage_id;
|
||||
COMMENT ON VIEW columnar.stripe
|
||||
IS 'Columnar stripe information for tables on which the current user has ownership privileges.';
|
||||
GRANT SELECT ON columnar.stripe TO PUBLIC;
|
||||
|
||||
CREATE VIEW columnar.chunk_group WITH (security_barrier) AS
|
||||
SELECT relation, storage.storage_id, stripe_num, chunk_group_num, row_count
|
||||
FROM columnar_internal.chunk_group cg, columnar.storage storage
|
||||
WHERE cg.storage_id = storage.storage_id;
|
||||
COMMENT ON VIEW columnar.chunk_group
|
||||
IS 'Columnar chunk group information for tables on which the current user has ownership privileges.';
|
||||
GRANT SELECT ON columnar.chunk_group TO PUBLIC;
|
||||
|
||||
CREATE VIEW columnar.chunk WITH (security_barrier) AS
|
||||
SELECT relation, storage.storage_id, stripe_num, attr_num, chunk_group_num,
|
||||
minimum_value, maximum_value, value_stream_offset, value_stream_length,
|
||||
exists_stream_offset, exists_stream_length, value_compression_type,
|
||||
value_compression_level, value_decompressed_length, value_count
|
||||
FROM columnar_internal.chunk chunk, columnar.storage storage
|
||||
WHERE chunk.storage_id = storage.storage_id;
|
||||
COMMENT ON VIEW columnar.chunk
|
||||
IS 'Columnar chunk information for tables on which the current user has ownership privileges.';
|
||||
GRANT SELECT ON columnar.chunk TO PUBLIC;
|