diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 376c44331..000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,1128 +0,0 @@ -version: 2.1 -orbs: - codecov: codecov/codecov@1.1.1 - azure-cli: circleci/azure-cli@1.0.0 - -parameters: - image_suffix: - type: string - default: '-v9d71045' - pg14_version: - type: string - default: '14.9' - pg15_version: - type: string - default: '15.4' - pg16_version: - type: string - default: '16.0' - upgrade_pg_versions: - type: string - default: '14.9-15.4-16.0' - style_checker_tools_version: - type: string - default: '0.8.18' - flaky_test: - type: string - default: '' - flaky_test_runs_per_job: - type: integer - default: 50 - skip_flaky_tests: - type: boolean - default: false - -commands: - install_extension: - parameters: - pg_major: - description: 'postgres major version to use' - type: integer - steps: - - run: - name: 'Install Extension' - command: | - tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-<< parameters.pg_major >>.tar" --directory / - - configure: - steps: - - run: - name: 'Configure' - command: | - chown -R circleci . - gosu circleci ./configure --without-pg-version-check - - enable_core: - steps: - - run: - name: 'Enable core dumps' - command: | - ulimit -c unlimited - - save_regressions: - steps: - - run: - name: 'Regressions' - command: | - if [ -f "src/test/regress/regression.diffs" ]; then - cat src/test/regress/regression.diffs - exit 1 - fi - when: on_fail - - store_artifacts: - name: 'Save regressions' - path: src/test/regress/regression.diffs - - save_logs_and_results: - steps: - - store_artifacts: - name: 'Save mitmproxy output (failure test specific)' - path: src/test/regress/proxy.output - - store_artifacts: - name: 'Save results' - path: src/test/regress/results/ - - store_artifacts: - name: 'Save coordinator log' - path: src/test/regress/tmp_check/master/log - - store_artifacts: - name: 'Save worker1 log' - path: src/test/regress/tmp_check/worker.57637/log - - store_artifacts: - name: 'Save worker2 log' - path: src/test/regress/tmp_check/worker.57638/log - - stack_trace: - steps: - - run: - name: 'Print stack traces' - command: | - ./ci/print_stack_trace.sh - when: on_fail - - coverage: - parameters: - flags: - description: 'codecov flags' - type: string - steps: - - codecov/upload: - flags: '<< parameters.flags >>' - - run: - name: 'Create codeclimate coverage' - command: | - lcov --directory . --capture --output-file lcov.info - lcov --remove lcov.info -o lcov.info '/usr/*' - sed "s=^SF:$PWD/=SF:=g" -i lcov.info # relative pats are required by codeclimate - mkdir -p /tmp/codeclimate - # We started getting permissions error. This fixes them and since - # weqre not on a multi-user system so this is safe to do. - git config --global --add safe.directory /home/circleci/project - cc-test-reporter format-coverage -t lcov -o /tmp/codeclimate/$CIRCLE_JOB.json lcov.info - - persist_to_workspace: - root: /tmp - paths: - - codeclimate/*.json - -jobs: - build: - description: Build the citus extension - parameters: - pg_major: - description: postgres major version building citus for - type: integer - image: - description: docker image to use for the build - type: string - default: citus/extbuilder - image_tag: - description: tag to use for the docker image - type: string - docker: - - image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>' - steps: - - checkout - - run: - name: 'Configure, Build, and Install' - command: | - ./ci/build-citus.sh - - persist_to_workspace: - root: . - paths: - - build-<< parameters.pg_major >>/* - - install-<>.tar - - check-style: - docker: - - image: 'citus/stylechecker:<< pipeline.parameters.style_checker_tools_version >><< pipeline.parameters.image_suffix >>' - steps: - - checkout - - run: - name: 'Check C Style' - command: citus_indent --check - - run: - name: 'Check Python style' - command: black --check . - - run: - name: 'Check Python import order' - command: isort --check . - - run: - name: 'Check Python lints' - command: flake8 . - - run: - name: 'Fix whitespace' - command: ci/editorconfig.sh && git diff --exit-code - - run: - name: 'Remove useless declarations' - command: ci/remove_useless_declarations.sh && git diff --cached --exit-code - - run: - name: 'Normalize test output' - command: ci/normalize_expected.sh && git diff --exit-code - - run: - name: 'Check for C-style comments in migration files' - command: ci/disallow_c_comments_in_migrations.sh && git diff --exit-code - - run: - name: 'Check for comment--cached ns that start with # character in spec files' - command: ci/disallow_hash_comments_in_spec_files.sh && git diff --exit-code - - run: - name: 'Check for gitignore entries .for source files' - command: ci/fix_gitignore.sh && git diff --exit-code - - run: - name: 'Check for lengths of changelog entries' - command: ci/disallow_long_changelog_entries.sh - - run: - name: 'Check for banned C API usage' - command: ci/banned.h.sh - - run: - name: 'Check for tests missing in schedules' - command: ci/check_all_tests_are_run.sh - - run: - name: 'Check if all CI scripts are actually run' - command: ci/check_all_ci_scripts_are_run.sh - - run: - name: 'Check if all GUCs are sorted alphabetically' - command: ci/check_gucs_are_alphabetically_sorted.sh - - run: - name: 'Check for missing downgrade scripts' - command: ci/check_migration_files.sh - - check-sql-snapshots: - docker: - - image: 'citus/extbuilder:latest' - steps: - - checkout - - run: - name: 'Check Snapshots' - command: ci/check_sql_snapshots.sh - - test-pg-upgrade: - description: Runs postgres upgrade tests - parameters: - old_pg_major: - description: 'postgres major version to use before the upgrade' - type: integer - new_pg_major: - description: 'postgres major version to upgrade to' - type: integer - image: - description: 'docker image to use as for the tests' - type: string - default: citus/pgupgradetester - image_tag: - description: 'docker image tag to use' - type: string - docker: - - image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>' - working_directory: /home/circleci/project - steps: - - checkout - - attach_workspace: - at: . - - install_extension: - pg_major: << parameters.old_pg_major >> - - install_extension: - pg_major: << parameters.new_pg_major >> - - configure - - enable_core - - run: - name: 'Install and test postgres upgrade' - command: | - gosu circleci \ - make -C src/test/regress \ - check-pg-upgrade \ - old-bindir=/usr/lib/postgresql/<< parameters.old_pg_major >>/bin \ - new-bindir=/usr/lib/postgresql/<< parameters.new_pg_major >>/bin - no_output_timeout: 2m - - run: - name: 'Copy pg_upgrade logs for newData dir' - command: | - mkdir -p /tmp/pg_upgrade_newData_logs - if ls src/test/regress/tmp_upgrade/newData/*.log 1> /dev/null 2>&1; then - cp src/test/regress/tmp_upgrade/newData/*.log /tmp/pg_upgrade_newData_logs - fi - when: on_fail - - store_artifacts: - name: 'Save pg_upgrade logs for newData dir' - path: /tmp/pg_upgrade_newData_logs - - save_logs_and_results - - save_regressions - - stack_trace - - coverage: - flags: 'test_<< parameters.old_pg_major >>_<< parameters.new_pg_major >>,upgrade' - - test-pytest: - description: Runs pytest based tests - parameters: - pg_major: - description: 'postgres major version' - type: integer - image: - description: 'docker image to use as for the tests' - type: string - default: citus/failtester - image_tag: - description: 'docker image tag to use' - type: string - docker: - - image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>' - working_directory: /home/circleci/project - steps: - - checkout - - attach_workspace: - at: . - - install_extension: - pg_major: << parameters.pg_major >> - - configure - - enable_core - - run: - name: 'Run pytest' - command: | - gosu circleci \ - make -C src/test/regress check-pytest - no_output_timeout: 2m - - stack_trace - - coverage: - flags: 'test_<< parameters.pg_major >>,pytest' - - - test-arbitrary-configs: - description: Runs tests on arbitrary configs - parallelism: 6 - parameters: - pg_major: - description: 'postgres major version to use' - type: integer - image: - description: 'docker image to use as for the tests' - type: string - default: citus/failtester - image_tag: - description: 'docker image tag to use' - type: string - docker: - - image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>' - resource_class: xlarge - working_directory: /home/circleci/project - steps: - - checkout - - attach_workspace: - at: . - - install_extension: - pg_major: << parameters.pg_major >> - - configure - - enable_core - - run: - name: 'Test arbitrary configs' - command: | - TESTS=$(src/test/regress/citus_tests/print_test_names.py | circleci tests split) - # Our test suite expects comma separated values - TESTS=$(echo $TESTS | tr ' ' ',') - # TESTS will contain subset of configs that will be run on a container and we use multiple containers - # to run the test suite - gosu circleci \ - make -C src/test/regress \ - check-arbitrary-configs parallel=4 CONFIGS=$TESTS - no_output_timeout: 2m - - run: - name: 'Show regressions' - command: | - find src/test/regress/tmp_citus_test/ -name "regression*.diffs" -exec cat {} + - lines=$(find src/test/regress/tmp_citus_test/ -name "regression*.diffs" | wc -l) - if [ $lines -ne 0 ]; then - exit 1 - fi - - when: on_fail - - run: - name: 'Copy logfiles' - command: | - mkdir src/test/regress/tmp_citus_test/logfiles - find src/test/regress/tmp_citus_test/ -name "logfile_*" -exec cp -t src/test/regress/tmp_citus_test/logfiles/ {} + - when: on_fail - - store_artifacts: - name: 'Save logfiles' - path: src/test/regress/tmp_citus_test/logfiles - - save_logs_and_results - - stack_trace - - coverage: - flags: 'test_<< parameters.pg_major >>,upgrade' - - test-citus-upgrade: - description: Runs citus upgrade tests - parameters: - pg_major: - description: 'postgres major version' - type: integer - image: - description: 'docker image to use as for the tests' - type: string - default: citus/citusupgradetester - image_tag: - description: 'docker image tag to use' - type: string - docker: - - image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>' - working_directory: /home/circleci/project - steps: - - checkout - - attach_workspace: - at: . - - configure - - enable_core - - run: - name: 'Install and test citus upgrade' - command: | - # run make check-citus-upgrade for all citus versions - # the image has ${CITUS_VERSIONS} set with all verions it contains the binaries of - for citus_version in ${CITUS_VERSIONS}; do \ - gosu circleci \ - make -C src/test/regress \ - check-citus-upgrade \ - bindir=/usr/lib/postgresql/${PG_MAJOR}/bin \ - citus-old-version=${citus_version} \ - citus-pre-tar=/install-pg${PG_MAJOR}-citus${citus_version}.tar \ - citus-post-tar=/home/circleci/project/install-$PG_MAJOR.tar; \ - done; - - # run make check-citus-upgrade-mixed for all citus versions - # the image has ${CITUS_VERSIONS} set with all verions it contains the binaries of - for citus_version in ${CITUS_VERSIONS}; do \ - gosu circleci \ - make -C src/test/regress \ - check-citus-upgrade-mixed \ - citus-old-version=${citus_version} \ - bindir=/usr/lib/postgresql/${PG_MAJOR}/bin \ - citus-pre-tar=/install-pg${PG_MAJOR}-citus${citus_version}.tar \ - citus-post-tar=/home/circleci/project/install-$PG_MAJOR.tar; \ - done; - no_output_timeout: 2m - - save_logs_and_results - - save_regressions - - stack_trace - - coverage: - flags: 'test_<< parameters.pg_major >>,upgrade' - - test-query-generator: - description: Expects that the generated queries that are run on distributed and local tables would have the same results - parameters: - pg_major: - description: 'postgres major version' - type: integer - image: - description: 'docker image to use as for the tests' - type: string - default: citus/failtester - image_tag: - description: 'docker image tag to use' - type: string - docker: - - image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>' - working_directory: /home/circleci/project - steps: - - checkout - - attach_workspace: - at: . - - install_extension: - pg_major: << parameters.pg_major >> - - configure - - enable_core - - run: - name: 'Run Test' - command: | - gosu circleci make -C src/test/regress check-query-generator - no_output_timeout: 5m - - run: - name: 'Show regressions' - command: | - find src/test/regress/citus_tests/query_generator/out/ -name "local_dist.diffs" -exec cat {} + - lines=$(find src/test/regress/citus_tests/query_generator/out/ -name "local_dist.diffs" | wc -l) - if [ $lines -ne 0 ]; then - exit 1 - fi - when: on_fail - - run: - name: 'Copy logfiles' - command: | - mkdir src/test/regress/tmp_citus_test/logfiles - find src/test/regress/tmp_citus_test/ -name "logfile_*" -exec cp -t src/test/regress/tmp_citus_test/logfiles/ {} + - when: on_fail - - store_artifacts: - name: 'Save logfiles' - path: src/test/regress/tmp_citus_test/logfiles - - store_artifacts: - name: 'Save ddls' - path: src/test/regress/citus_tests/query_generator/out/ddls.sql - - store_artifacts: - name: 'Save dmls' - path: src/test/regress/citus_tests/query_generator/out/queries.sql - - store_artifacts: - name: 'Save diffs' - path: src/test/regress/citus_tests/query_generator/out/local_dist.diffs - - stack_trace - - coverage: - flags: 'test_<< parameters.pg_major >>,querygen' - - test-citus: - description: Runs the common tests of citus - parameters: - pg_major: - description: 'postgres major version' - type: integer - image: - description: 'docker image to use as for the tests' - type: string - default: citus/exttester - image_tag: - description: 'docker image tag to use' - type: string - make: - description: 'make target' - type: string - docker: - - image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>' - working_directory: /home/circleci/project - steps: - - checkout - - attach_workspace: - at: . - - install_extension: - pg_major: << parameters.pg_major >> - - configure - - enable_core - - run: - name: 'Run Test' - command: | - gosu circleci make -C src/test/regress << parameters.make >> - no_output_timeout: 2m - - save_logs_and_results - - save_regressions - - stack_trace - - coverage: - flags: 'test_<< parameters.pg_major >>,<< parameters.make >>' - - tap-test-citus: - description: Runs tap tests for citus - parameters: - pg_major: - description: 'postgres major version' - type: integer - image: - description: 'docker image to use as for the tests' - type: string - default: citus/exttester - image_tag: - description: 'docker image tag to use' - type: string - suite: - description: 'name of the tap test suite to run' - type: string - make: - description: 'make target' - type: string - default: installcheck - docker: - - image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>' - working_directory: /home/circleci/project - steps: - - checkout - - attach_workspace: - at: . - - install_extension: - pg_major: << parameters.pg_major >> - - configure - - enable_core - - run: - name: 'Run Test' - command: | - gosu circleci make -C src/test/<< parameters.suite >> << parameters.make >> - no_output_timeout: 2m - - store_artifacts: - name: 'Save tap logs' - path: /home/circleci/project/src/test/<< parameters.suite >>/tmp_check/log - - save_logs_and_results - - stack_trace - - coverage: - flags: 'test_<< parameters.pg_major >>,tap_<< parameters.suite >>_<< parameters.make >>' - - check-merge-to-enterprise: - docker: - - image: citus/extbuilder:<< pipeline.parameters.pg14_version >> - working_directory: /home/circleci/project - steps: - - checkout - - run: - command: | - ci/check_enterprise_merge.sh - - ch_benchmark: - docker: - - image: buildpack-deps:stretch - working_directory: /home/circleci/project - steps: - - checkout - - azure-cli/install - - azure-cli/login-with-service-principal - - run: - command: | - cd ./src/test/hammerdb - sh run_hammerdb.sh citusbot_ch_benchmark_rg - name: install dependencies and run ch_benchmark tests - no_output_timeout: 20m - - tpcc_benchmark: - docker: - - image: buildpack-deps:stretch - working_directory: /home/circleci/project - steps: - - checkout - - azure-cli/install - - azure-cli/login-with-service-principal - - run: - command: | - cd ./src/test/hammerdb - sh run_hammerdb.sh citusbot_tpcc_benchmark_rg - name: install dependencies and run ch_benchmark tests - no_output_timeout: 20m - - test-flakyness: - description: Runs a test multiple times to see if it's flaky - parallelism: 32 - parameters: - pg_major: - description: 'postgres major version' - type: integer - image: - description: 'docker image to use as for the tests' - type: string - default: citus/failtester - image_tag: - description: 'docker image tag to use' - type: string - test: - description: 'the test file path that should be run multiple times' - type: string - default: '' - runs: - description: 'number of times that the test should be run in total' - type: integer - default: 8 - skip: - description: 'A flag to bypass flaky test detection.' - type: boolean - default: false - docker: - - image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>' - working_directory: /home/circleci/project - resource_class: small - steps: - - checkout - - attach_workspace: - at: . - - run: - name: 'Detect regression tests need to be ran' - command: | - skip=<< parameters.skip >> - if [ "$skip" = true ]; then - echo "Skipping flaky test detection." - circleci-agent step halt - fi - - testForDebugging="<< parameters.test >>" - - if [ -z "$testForDebugging" ]; then - detected_changes=$(git diff origin/main... --name-only --diff-filter=AM | (grep 'src/test/regress/sql/.*\.sql\|src/test/regress/spec/.*\.spec\|src/test/regress/citus_tests/test/test_.*\.py' || true)) - tests=${detected_changes} - else - tests=$testForDebugging; - fi - - if [ -z "$tests" ]; then - echo "No test found." - circleci-agent step halt - else - echo "Detected tests " $tests - fi - - echo export tests=\""$tests"\" >> "$BASH_ENV" - source "$BASH_ENV" - - install_extension: - pg_major: << parameters.pg_major >> - - configure - - enable_core - - run: - name: 'Run minimal tests' - command: | - tests_array=($tests) - for test in "${tests_array[@]}" - do - test_name=$(echo "$test" | sed -r "s/.+\/(.+)\..+/\1/") - gosu circleci src/test/regress/citus_tests/run_test.py $test_name --repeat << parameters.runs >> --use-base-schedule --use-whole-schedule-line - done - no_output_timeout: 2m - - save_logs_and_results - - save_regressions - - stack_trace - - upload-coverage: - docker: - - image: 'citus/exttester:<< pipeline.parameters.pg15_version >><< pipeline.parameters.image_suffix >>' - working_directory: /home/circleci/project - steps: - - attach_workspace: - at: . - - run: - name: Upload coverage results to Code Climate - command: | - cc-test-reporter sum-coverage codeclimate/*.json -o total.json - cc-test-reporter upload-coverage -i total.json - -workflows: - version: 2 - flaky_test_debugging: - jobs: - - build: - name: build-flaky-15 - pg_major: 15 - image_tag: '<< pipeline.parameters.pg15_version >>' - - - test-flakyness: - name: 'test-15_flaky' - pg_major: 15 - image_tag: '<< pipeline.parameters.pg15_version >>' - requires: [build-flaky-15] - test: '<< pipeline.parameters.flaky_test >>' - runs: << pipeline.parameters.flaky_test_runs_per_job >> - - build_and_test: - jobs: - - build: - name: build-14 - pg_major: 14 - image_tag: '<< pipeline.parameters.pg14_version >>' - - build: - name: build-15 - pg_major: 15 - image_tag: '<< pipeline.parameters.pg15_version >>' - - build: - name: build-16 - pg_major: 16 - image_tag: '<< pipeline.parameters.pg16_version >>' - - - check-style - - check-sql-snapshots - - - test-citus: &test-citus-14 - name: 'test-14_check-split' - make: check-split - pg_major: 14 - image_tag: '<< pipeline.parameters.pg14_version >>' - requires: [build-14] - - test-citus: - <<: *test-citus-14 - name: 'test-14_check-enterprise' - make: check-enterprise - - test-citus: - <<: *test-citus-14 - name: 'test-14_check-enterprise-isolation' - make: check-enterprise-isolation - - test-citus: - <<: *test-citus-14 - name: 'test-14_check-enterprise-isolation-logicalrep-1' - make: check-enterprise-isolation-logicalrep-1 - - test-citus: - <<: *test-citus-14 - name: 'test-14_check-enterprise-isolation-logicalrep-2' - make: check-enterprise-isolation-logicalrep-2 - - test-citus: - <<: *test-citus-14 - name: 'test-14_check-enterprise-isolation-logicalrep-3' - make: check-enterprise-isolation-logicalrep-3 - - test-citus: - <<: *test-citus-14 - name: 'test-14_check-enterprise-failure' - image: citus/failtester - make: check-enterprise-failure - - test-citus: - <<: *test-citus-14 - name: 'test-14_check-multi' - make: check-multi - - test-citus: - <<: *test-citus-14 - name: 'test-14_check-multi-1' - make: check-multi-1 - - test-citus: - <<: *test-citus-14 - name: 'test-14_check-mx' - make: check-multi-mx - - test-citus: - <<: *test-citus-14 - name: 'test-14_check-vanilla' - make: check-vanilla - - test-citus: - <<: *test-citus-14 - name: 'test-14_check-isolation' - make: check-isolation - - test-citus: - <<: *test-citus-14 - name: 'test-14_check-operations' - make: check-operations - - test-citus: - <<: *test-citus-14 - name: 'test-14_check-follower-cluster' - make: check-follower-cluster - - test-citus: - <<: *test-citus-14 - name: 'test-14_check-columnar' - make: check-columnar - - test-citus: - <<: *test-citus-14 - name: 'test-14_check-columnar-isolation' - make: check-columnar-isolation - - test-citus: - <<: *test-citus-14 - name: 'test-14_check-failure' - image: citus/failtester - make: check-failure - - - test-citus: &test-citus-15 - name: 'test-15_check-split' - make: check-split - pg_major: 15 - image_tag: '<< pipeline.parameters.pg15_version >>' - requires: [build-15] - - test-citus: - <<: *test-citus-15 - name: 'test-15_check-enterprise' - make: check-enterprise - - test-citus: - <<: *test-citus-15 - name: 'test-15_check-enterprise-isolation' - make: check-enterprise-isolation - - test-citus: - <<: *test-citus-15 - name: 'test-15_check-enterprise-isolation-logicalrep-1' - make: check-enterprise-isolation-logicalrep-1 - - test-citus: - <<: *test-citus-15 - name: 'test-15_check-enterprise-isolation-logicalrep-2' - make: check-enterprise-isolation-logicalrep-2 - - test-citus: - <<: *test-citus-15 - name: 'test-15_check-enterprise-isolation-logicalrep-3' - make: check-enterprise-isolation-logicalrep-3 - - test-citus: - <<: *test-citus-15 - name: 'test-15_check-enterprise-failure' - image: citus/failtester - make: check-enterprise-failure - - test-citus: - <<: *test-citus-15 - name: 'test-15_check-multi' - make: check-multi - - test-citus: - <<: *test-citus-15 - name: 'test-15_check-multi-1' - make: check-multi-1 - - test-citus: - <<: *test-citus-15 - name: 'test-15_check-mx' - make: check-multi-mx - - test-citus: - <<: *test-citus-15 - name: 'test-15_check-vanilla' - make: check-vanilla - - test-citus: - <<: *test-citus-15 - name: 'test-15_check-isolation' - make: check-isolation - - test-citus: - <<: *test-citus-15 - name: 'test-15_check-operations' - make: check-operations - - test-citus: - <<: *test-citus-15 - name: 'test-15_check-follower-cluster' - make: check-follower-cluster - - test-citus: - <<: *test-citus-15 - name: 'test-15_check-columnar' - make: check-columnar - - test-citus: - <<: *test-citus-15 - name: 'test-15_check-columnar-isolation' - make: check-columnar-isolation - - test-citus: - <<: *test-citus-15 - name: 'test-15_check-failure' - image: citus/failtester - make: check-failure - - - test-citus: &test-citus-16 - name: 'test-16_check-split' - make: check-split - pg_major: 16 - image_tag: '<< pipeline.parameters.pg16_version >>' - requires: [build-16] - - test-citus: - <<: *test-citus-16 - name: 'test-16_check-enterprise' - make: check-enterprise - - test-citus: - <<: *test-citus-16 - name: 'test-16_check-enterprise-isolation' - make: check-enterprise-isolation - - test-citus: - <<: *test-citus-16 - name: 'test-16_check-enterprise-isolation-logicalrep-1' - make: check-enterprise-isolation-logicalrep-1 - - test-citus: - <<: *test-citus-16 - name: 'test-16_check-enterprise-isolation-logicalrep-2' - make: check-enterprise-isolation-logicalrep-2 - - test-citus: - <<: *test-citus-16 - name: 'test-16_check-enterprise-isolation-logicalrep-3' - make: check-enterprise-isolation-logicalrep-3 - - test-citus: - <<: *test-citus-16 - name: 'test-16_check-enterprise-failure' - image: citus/failtester - make: check-enterprise-failure - - test-citus: - <<: *test-citus-16 - name: 'test-16_check-multi' - make: check-multi - - test-citus: - <<: *test-citus-16 - name: 'test-16_check-multi-1' - make: check-multi-1 - - test-citus: - <<: *test-citus-16 - name: 'test-16_check-mx' - make: check-multi-mx - - test-citus: - <<: *test-citus-16 - name: 'test-16_check-vanilla' - make: check-vanilla - - test-citus: - <<: *test-citus-16 - name: 'test-16_check-isolation' - make: check-isolation - - test-citus: - <<: *test-citus-16 - name: 'test-16_check-operations' - make: check-operations - - test-citus: - <<: *test-citus-16 - name: 'test-16_check-follower-cluster' - make: check-follower-cluster - - test-citus: - <<: *test-citus-16 - name: 'test-16_check-columnar' - make: check-columnar - - test-citus: - <<: *test-citus-16 - name: 'test-16_check-columnar-isolation' - make: check-columnar-isolation - - test-citus: - <<: *test-citus-16 - name: 'test-16_check-failure' - image: citus/failtester - make: check-failure - - - test-pytest: - name: 'test-14_pytest' - pg_major: 14 - image_tag: '<< pipeline.parameters.pg14_version >>' - requires: [build-14] - - - test-pytest: - name: 'test-15_pytest' - pg_major: 15 - image_tag: '<< pipeline.parameters.pg15_version >>' - requires: [build-15] - - - test-pytest: - name: 'test-16_pytest' - pg_major: 16 - image_tag: '<< pipeline.parameters.pg16_version >>' - requires: [build-16] - - - tap-test-citus: - name: 'test-15_tap-cdc' - suite: cdc - pg_major: 15 - image_tag: '<< pipeline.parameters.pg15_version >>' - requires: [build-15] - - - tap-test-citus: - name: 'test-16_tap-cdc' - suite: cdc - pg_major: 16 - image_tag: '<< pipeline.parameters.pg16_version >>' - requires: [build-16] - - - test-arbitrary-configs: - name: 'test-14_check-arbitrary-configs' - pg_major: 14 - image_tag: '<< pipeline.parameters.pg14_version >>' - requires: [build-14] - - - test-arbitrary-configs: - name: 'test-15_check-arbitrary-configs' - pg_major: 15 - image_tag: '<< pipeline.parameters.pg15_version >>' - requires: [build-15] - - - test-arbitrary-configs: - name: 'test-16_check-arbitrary-configs' - pg_major: 16 - image_tag: '<< pipeline.parameters.pg16_version >>' - requires: [build-16] - - - test-query-generator: - name: 'test-14_check-query-generator' - pg_major: 14 - image_tag: '<< pipeline.parameters.pg14_version >>' - requires: [build-14] - - - test-query-generator: - name: 'test-15_check-query-generator' - pg_major: 15 - image_tag: '<< pipeline.parameters.pg15_version >>' - requires: [build-15] - - - test-query-generator: - name: 'test-16_check-query-generator' - pg_major: 16 - image_tag: '<< pipeline.parameters.pg16_version >>' - requires: [build-16] - - - test-pg-upgrade: - name: 'test-14-15_check-pg-upgrade' - old_pg_major: 14 - new_pg_major: 15 - image_tag: '<< pipeline.parameters.upgrade_pg_versions >>' - requires: [build-14, build-15] - - - test-pg-upgrade: - name: 'test-15-16_check-pg-upgrade' - old_pg_major: 15 - new_pg_major: 16 - image_tag: '<< pipeline.parameters.upgrade_pg_versions >>' - requires: [build-15, build-16] - - - test-pg-upgrade: - name: 'test-14-16_check-pg-upgrade' - old_pg_major: 14 - new_pg_major: 16 - image_tag: '<< pipeline.parameters.upgrade_pg_versions >>' - requires: [build-14, build-16] - - - test-citus-upgrade: - name: test-14_check-citus-upgrade - pg_major: 14 - image_tag: '<< pipeline.parameters.pg14_version >>' - requires: [build-14] - - - upload-coverage: - requires: - - test-14_check-multi - - test-14_check-multi-1 - - test-14_check-mx - - test-14_check-vanilla - - test-14_check-isolation - - test-14_check-operations - - test-14_check-follower-cluster - - test-14_check-columnar - - test-14_check-columnar-isolation - - test-14_check-failure - - test-14_check-enterprise - - test-14_check-enterprise-isolation - - test-14_check-enterprise-isolation-logicalrep-1 - - test-14_check-enterprise-isolation-logicalrep-2 - - test-14_check-enterprise-isolation-logicalrep-3 - - test-14_check-enterprise-failure - - test-14_check-split - - test-14_check-arbitrary-configs - - test-14_check-query-generator - - test-15_check-multi - - test-15_check-multi-1 - - test-15_check-mx - - test-15_check-vanilla - - test-15_check-isolation - - test-15_check-operations - - test-15_check-follower-cluster - - test-15_check-columnar - - test-15_check-columnar-isolation - - test-15_check-failure - - test-15_check-enterprise - - test-15_check-enterprise-isolation - - test-15_check-enterprise-isolation-logicalrep-1 - - test-15_check-enterprise-isolation-logicalrep-2 - - test-15_check-enterprise-isolation-logicalrep-3 - - test-15_check-enterprise-failure - - test-15_check-split - - test-15_check-arbitrary-configs - - test-15_check-query-generator - - test-16_check-multi - - test-16_check-multi-1 - - test-16_check-mx - - test-16_check-vanilla - - test-16_check-isolation - - test-16_check-operations - - test-16_check-follower-cluster - - test-16_check-columnar - - test-16_check-columnar-isolation - - test-16_check-failure - - test-16_check-enterprise - - test-16_check-enterprise-isolation - - test-16_check-enterprise-isolation-logicalrep-1 - - test-16_check-enterprise-isolation-logicalrep-2 - - test-16_check-enterprise-isolation-logicalrep-3 - - test-16_check-enterprise-failure - - test-16_check-split - - test-16_check-arbitrary-configs - - test-16_check-query-generator - - test-14-15_check-pg-upgrade - - test-15-16_check-pg-upgrade - - test-14-16_check-pg-upgrade - - test-14_check-citus-upgrade - - - ch_benchmark: - requires: [build-14] - filters: - branches: - only: - - /ch_benchmark\/.*/ # match with ch_benchmark/ prefix - - tpcc_benchmark: - requires: [build-14] - filters: - branches: - only: - - /tpcc_benchmark\/.*/ # match with tpcc_benchmark/ prefix - - test-flakyness: - name: 'test-15_flaky' - pg_major: 15 - image_tag: '<< pipeline.parameters.pg15_version >>' - requires: [build-15] - skip: << pipeline.parameters.skip_flaky_tests >> diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 1f22ff034..d285e4f50 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -27,9 +27,9 @@ jobs: style_checker_image_name: "citus/stylechecker" style_checker_tools_version: "0.8.18" image_suffix: "-v9d71045" - pg14_version: "14.9" - pg15_version: "15.4" - pg16_version: "16.0" + pg14_version: '{ "major": "14", "full": "14.9" }' + pg15_version: '{ "major": "15", "full": "15.4" }' + pg16_version: '{ "major": "16", "full": "16.0" }' upgrade_pg_versions: "14.9-15.4-16.0" steps: # Since GHA jobs needs at least one step we use a noop step here. @@ -93,7 +93,7 @@ jobs: run: ci/check_migration_files.sh build: needs: params - name: Build for PG ${{ matrix.pg_version}} + name: Build for PG${{ fromJson(matrix.pg_version).major }} strategy: fail-fast: false matrix: @@ -107,7 +107,7 @@ jobs: - ${{ needs.params.outputs.pg16_version }} runs-on: ubuntu-20.04 container: - image: "${{ matrix.image_name }}:${{ matrix.pg_version }}${{ matrix.image_suffix }}" + image: "${{ matrix.image_name }}:${{ fromJson(matrix.pg_version).full }}${{ matrix.image_suffix }}" options: --user root steps: - uses: actions/checkout@v3.5.0 @@ -124,7 +124,7 @@ jobs: ./build-${{ env.PG_MAJOR }}/* ./install-${{ env.PG_MAJOR }}.tar test-citus: - name: PG${{ matrix.pg_version }} - ${{ matrix.make }} + name: PG${{ fromJson(matrix.pg_version).major }} - ${{ matrix.make }} strategy: fail-fast: false matrix: @@ -211,7 +211,7 @@ jobs: image_name: ${{ needs.params.outputs.fail_test_image_name }} runs-on: ubuntu-20.04 container: - image: "${{ matrix.image_name }}:${{ matrix.pg_version }}${{ needs.params.outputs.image_suffix }}" + image: "${{ matrix.image_name }}:${{ fromJson(matrix.pg_version).full }}${{ needs.params.outputs.image_suffix }}" options: --user root --dns=8.8.8.8 # Due to Github creates a default network for each job, we need to use # --dns= to have similar DNS settings as our other CI systems or local @@ -228,17 +228,17 @@ jobs: - uses: "./.github/actions/save_logs_and_results" if: always() with: - folder: ${{ matrix.pg_version }}_${{ matrix.make }} + folder: ${{ fromJson(matrix.pg_version).major }}_${{ matrix.make }} - uses: "./.github/actions/upload_coverage" if: always() with: flags: ${{ env.PG_MAJOR }}_${{ matrix.suite }}_${{ matrix.make }} codecov_token: ${{ secrets.CODECOV_TOKEN }} test-arbitrary-configs: - name: PG${{ matrix.pg_version }} - check-arbitrary-configs-${{ matrix.parallel }} + name: PG${{ fromJson(matrix.pg_version).major }} - check-arbitrary-configs-${{ matrix.parallel }} runs-on: ["self-hosted", "1ES.Pool=1es-gha-citusdata-pool"] container: - image: "${{ matrix.image_name }}:${{ matrix.pg_version }}${{ needs.params.outputs.image_suffix }}" + image: "${{ matrix.image_name }}:${{ fromJson(matrix.pg_version).full }}${{ needs.params.outputs.image_suffix }}" options: --user root needs: - params @@ -333,10 +333,10 @@ jobs: flags: ${{ env.old_pg_major }}_${{ env.new_pg_major }}_upgrade codecov_token: ${{ secrets.CODECOV_TOKEN }} test-citus-upgrade: - name: PG${{ needs.params.outputs.pg14_version }} - check-citus-upgrade + name: PG${{ fromJson(needs.params.outputs.pg14_version).major }} - check-citus-upgrade runs-on: ubuntu-20.04 container: - image: "${{ needs.params.outputs.citusupgrade_image_name }}:${{ needs.params.outputs.pg14_version }}${{ needs.params.outputs.image_suffix }}" + image: "${{ needs.params.outputs.citusupgrade_image_name }}:${{ fromJson(needs.params.outputs.pg14_version).full }}${{ needs.params.outputs.image_suffix }}" options: --user root needs: - params @@ -383,7 +383,7 @@ jobs: CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} runs-on: ubuntu-20.04 container: - image: ${{ needs.params.outputs.test_image_name }}:${{ needs.params.outputs.pg16_version }}${{ needs.params.outputs.image_suffix }} + image: ${{ needs.params.outputs.test_image_name }}:${{ fromJson(needs.params.outputs.pg16_version).full }}${{ needs.params.outputs.image_suffix }} needs: - params - test-citus @@ -478,7 +478,7 @@ jobs: name: Test flakyness runs-on: ubuntu-20.04 container: - image: ${{ needs.params.outputs.fail_test_image_name }}:${{ needs.params.outputs.pg16_version }}${{ needs.params.outputs.image_suffix }} + image: ${{ needs.params.outputs.fail_test_image_name }}:${{ fromJson(needs.params.outputs.pg16_version).full }}${{ needs.params.outputs.image_suffix }} options: --user root env: runs: 8 diff --git a/.github/workflows/packaging-test-pipelines.yml b/.github/workflows/packaging-test-pipelines.yml index 0fb4b7092..8690fce1f 100644 --- a/.github/workflows/packaging-test-pipelines.yml +++ b/.github/workflows/packaging-test-pipelines.yml @@ -24,9 +24,11 @@ jobs: - name: Get Postgres Versions id: get-postgres-versions run: | - # Postgres versions are stored in .github/workflows/build_and_test.yml file in "pg[pg-version]_version" - # format. Below command extracts the versions and get the unique values. - pg_versions=$(cat .github/workflows/build_and_test.yml | grep -oE 'pg[0-9]+_version: "[0-9.]+"' | sed -E 's/pg([0-9]+)_version: "([0-9.]+)"/\1/g' | sort | uniq | tr '\n', ',') + set -euxo pipefail + # Postgres versions are stored in .github/workflows/build_and_test.yml + # file in json strings with major and full keys. + # Below command extracts the versions and get the unique values. + pg_versions=$(cat .github/workflows/build_and_test.yml | grep -oE '"major": "[0-9]+", "full": "[0-9.]+"' | sed -E 's/"major": "([0-9]+)", "full": "([0-9.]+)"/\1/g' | sort | uniq | tr '\n', ',') pg_versions_array="[ ${pg_versions} ]" echo "Supported PG Versions: ${pg_versions_array}" # Below line is needed to set the output variable to be used in the next job diff --git a/ci/check_all_ci_scripts_are_run.sh b/ci/check_all_ci_scripts_are_run.sh index 0b7abb3e3..12516f793 100755 --- a/ci/check_all_ci_scripts_are_run.sh +++ b/ci/check_all_ci_scripts_are_run.sh @@ -14,8 +14,8 @@ ci_scripts=$( grep -v -E '^(ci_helpers.sh|fix_style.sh)$' ) for script in $ci_scripts; do - if ! grep "\\bci/$script\\b" .circleci/config.yml > /dev/null; then - echo "ERROR: CI script with name \"$script\" is not actually used in .circleci/config.yml" + if ! grep "\\bci/$script\\b" -r .github > /dev/null; then + echo "ERROR: CI script with name \"$script\" is not actually used in .github folder" exit 1 fi if ! grep "^## \`$script\`\$" ci/README.md > /dev/null; then diff --git a/ci/check_enterprise_merge.sh b/ci/check_enterprise_merge.sh deleted file mode 100755 index d29ffcad8..000000000 --- a/ci/check_enterprise_merge.sh +++ /dev/null @@ -1,96 +0,0 @@ -#!/bin/bash - -# Testing this script locally requires you to set the following environment -# variables: -# CIRCLE_BRANCH, GIT_USERNAME and GIT_TOKEN - -# fail if trying to reference a variable that is not set. -set -u -# exit immediately if a command fails -set -e -# Fail on pipe failures -set -o pipefail - -PR_BRANCH="${CIRCLE_BRANCH}" -ENTERPRISE_REMOTE="https://${GIT_USERNAME}:${GIT_TOKEN}@github.com/citusdata/citus-enterprise" - -# shellcheck disable=SC1091 -source ci/ci_helpers.sh - -# List executed commands. This is done so debugging this script is easier when -# it fails. It's explicitly done after git remote add so username and password -# are not shown in CI output (even though it's also filtered out by CircleCI) -set -x - -check_compile () { - echo "INFO: checking if merged code can be compiled" - ./configure --without-libcurl - make -j10 -} - -# Clone current git repo (which should be community) to a temporary working -# directory and go there -GIT_DIR_ROOT="$(git rev-parse --show-toplevel)" -TMP_GIT_DIR="$(mktemp --directory -t citus-merge-check.XXXXXXXXX)" -git clone "$GIT_DIR_ROOT" "$TMP_GIT_DIR" -cd "$TMP_GIT_DIR" - -# Fails in CI without this -git config user.email "citus-bot@microsoft.com" -git config user.name "citus bot" - -# Disable "set -x" temporarily, because $ENTERPRISE_REMOTE contains passwords -{ set +x ; } 2> /dev/null -git remote add enterprise "$ENTERPRISE_REMOTE" -set -x - -git remote set-url --push enterprise no-pushing - -# Fetch enterprise-master -git fetch enterprise enterprise-master - - -git checkout "enterprise/enterprise-master" - -if git merge --no-commit "origin/$PR_BRANCH"; then - echo "INFO: community PR branch could be merged into enterprise-master" - # check that we can compile after the merge - if check_compile; then - exit 0 - fi - - echo "WARN: Failed to compile after community PR branch was merged into enterprise" -fi - -# undo partial merge -git merge --abort - -# If we have a conflict on enterprise merge on the master branch, we have a problem. -# Provide an error message to indicate that enterprise merge is needed to fix this check. -if [[ $PR_BRANCH = master ]]; then - echo "ERROR: Master branch has merge conflicts with enterprise-master." - echo "Try re-running this CI job after merging your changes into enterprise-master." - exit 1 -fi - -if ! git fetch enterprise "$PR_BRANCH" ; then - echo "ERROR: enterprise/$PR_BRANCH was not found and community PR branch could not be merged into enterprise-master" - exit 1 -fi - -# Show the top commit of the enterprise PR branch to make debugging easier -git log -n 1 "enterprise/$PR_BRANCH" - -# Check that this branch contains the top commit of the current community PR -# branch. If it does not it means it's not up to date with the current PR, so -# the enterprise branch should be updated. -if ! git merge-base --is-ancestor "origin/$PR_BRANCH" "enterprise/$PR_BRANCH" ; then - echo "ERROR: enterprise/$PR_BRANCH is not up to date with community PR branch" - exit 1 -fi - -# Now check if we can merge the enterprise PR into enterprise-master without -# issues. -git merge --no-commit "enterprise/$PR_BRANCH" -# check that we can compile after the merge -check_compile diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index 1ac20c8bc..9b5768ee7 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -481,6 +481,7 @@ _PG_init(void) #endif InitializeMaintenanceDaemon(); + InitializeMaintenanceDaemonForMainDb(); /* initialize coordinated transaction management */ InitializeTransactionManagement(); @@ -1820,6 +1821,16 @@ RegisterCitusConfigVariables(void) GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE | GUC_UNIT_MS, NULL, NULL, NULL); + DefineCustomStringVariable( + "citus.main_db", + gettext_noop("Which database is designated as the main_db"), + NULL, + &MainDb, + "", + PGC_POSTMASTER, + GUC_STANDARD, + NULL, NULL, NULL); + DefineCustomIntVariable( "citus.max_adaptive_executor_pool_size", gettext_noop("Sets the maximum number of connections per worker node used by " diff --git a/src/backend/distributed/utils/maintenanced.c b/src/backend/distributed/utils/maintenanced.c index 5f49de20a..22a0843bd 100644 --- a/src/backend/distributed/utils/maintenanced.c +++ b/src/backend/distributed/utils/maintenanced.c @@ -99,6 +99,7 @@ int Recover2PCInterval = 60000; int DeferShardDeleteInterval = 15000; int BackgroundTaskQueueCheckInterval = 5000; int MaxBackgroundTaskExecutors = 4; +char *MainDb = ""; /* config variables for metadata sync timeout */ int MetadataSyncInterval = 60000; @@ -112,7 +113,7 @@ static MaintenanceDaemonControlData *MaintenanceDaemonControl = NULL; * activated. */ static HTAB *MaintenanceDaemonDBHash; - +static ErrorContextCallback errorCallback = { 0 }; static volatile sig_atomic_t got_SIGHUP = false; static volatile sig_atomic_t got_SIGTERM = false; @@ -125,6 +126,8 @@ static void MaintenanceDaemonShmemExit(int code, Datum arg); static void MaintenanceDaemonErrorContext(void *arg); static bool MetadataSyncTriggeredCheckAndReset(MaintenanceDaemonDBData *dbData); static void WarnMaintenanceDaemonNotStarted(void); +static MaintenanceDaemonDBData * GetMaintenanceDaemonDBHashEntry(Oid databaseId, + bool *found); /* * InitializeMaintenanceDaemon, called at server start, is responsible for @@ -139,6 +142,82 @@ InitializeMaintenanceDaemon(void) } +/* + * GetMaintenanceDaemonDBHashEntry searches the MaintenanceDaemonDBHash for the + * databaseId. It returns the entry if found or creates a new entry and initializes + * the value with zeroes. + */ +MaintenanceDaemonDBData * +GetMaintenanceDaemonDBHashEntry(Oid databaseId, bool *found) +{ + MaintenanceDaemonDBData *dbData = (MaintenanceDaemonDBData *) hash_search( + MaintenanceDaemonDBHash, + &MyDatabaseId, + HASH_ENTER_NULL, + found); + + if (!dbData) + { + elog(LOG, + "cannot create or find the maintenance deamon hash entry for database %u", + databaseId); + return NULL; + } + + if (!*found) + { + /* ensure the values in MaintenanceDaemonDBData are zero */ + memset(((char *) dbData) + sizeof(Oid), 0, + sizeof(MaintenanceDaemonDBData) - sizeof(Oid)); + } + + return dbData; +} + + +/* + * InitializeMaintenanceDaemonForMainDb is called in _PG_Init + * at which stage we are not in a transaction or have databaseOid + */ +void +InitializeMaintenanceDaemonForMainDb(void) +{ + if (strcmp(MainDb, "") == 0) + { + elog(LOG, "There is no designated Main database."); + return; + } + + BackgroundWorker worker; + + memset(&worker, 0, sizeof(worker)); + + + strcpy_s(worker.bgw_name, sizeof(worker.bgw_name), + "Citus Maintenance Daemon for Main DB"); + + /* request ability to connect to target database */ + worker.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; + + /* + * No point in getting started before able to run query, but we do + * want to get started on Hot-Standby. + */ + worker.bgw_start_time = BgWorkerStart_ConsistentState; + + /* Restart after a bit after errors, but don't bog the system. */ + worker.bgw_restart_time = 5; + strcpy_s(worker.bgw_library_name, + sizeof(worker.bgw_library_name), "citus"); + strcpy_s(worker.bgw_function_name, sizeof(worker.bgw_library_name), + "CitusMaintenanceDaemonMain"); + + worker.bgw_main_arg = (Datum) 0; + + RegisterBackgroundWorker(&worker); +} + + /* * InitializeMaintenanceDaemonBackend, called at backend start and * configuration changes, is responsible for starting a per-database @@ -148,31 +227,20 @@ void InitializeMaintenanceDaemonBackend(void) { Oid extensionOwner = CitusExtensionOwner(); - bool found; + bool found = false; LWLockAcquire(&MaintenanceDaemonControl->lock, LW_EXCLUSIVE); - MaintenanceDaemonDBData *dbData = (MaintenanceDaemonDBData *) hash_search( - MaintenanceDaemonDBHash, - &MyDatabaseId, - HASH_ENTER_NULL, - &found); + MaintenanceDaemonDBData *dbData = GetMaintenanceDaemonDBHashEntry(MyDatabaseId, + &found); if (dbData == NULL) { WarnMaintenanceDaemonNotStarted(); LWLockRelease(&MaintenanceDaemonControl->lock); - return; } - if (!found) - { - /* ensure the values in MaintenanceDaemonDBData are zero */ - memset(((char *) dbData) + sizeof(Oid), 0, - sizeof(MaintenanceDaemonDBData) - sizeof(Oid)); - } - if (IsMaintenanceDaemon) { /* @@ -271,66 +339,97 @@ WarnMaintenanceDaemonNotStarted(void) /* - * CitusMaintenanceDaemonMain is the maintenance daemon's main routine, it'll - * be started by the background worker infrastructure. If it errors out, - * it'll be restarted after a few seconds. + * ConnectToDatabase connects to the database for the given databaseOid. + * if databaseOid is 0, connects to MainDb and then creates a hash entry. + * If a hash entry cannot be created for MainDb it exits the process requesting a restart. + * However for regular databases, it exits without requesting a restart since another + * subsequent backend is expected to start the Maintenance Daemon. + * If the found hash entry has a valid workerPid, it exits + * without requesting a restart since there is already a daemon running. */ -void -CitusMaintenanceDaemonMain(Datum main_arg) +static MaintenanceDaemonDBData * +ConnectToDatabase(Oid databaseOid) { - Oid databaseOid = DatumGetObjectId(main_arg); - TimestampTz nextStatsCollectionTime USED_WITH_LIBCURL_ONLY = - TimestampTzPlusMilliseconds(GetCurrentTimestamp(), 60 * 1000); - bool retryStatsCollection USED_WITH_LIBCURL_ONLY = false; - TimestampTz lastRecoveryTime = 0; - TimestampTz lastShardCleanTime = 0; - TimestampTz lastStatStatementsPurgeTime = 0; - TimestampTz nextMetadataSyncTime = 0; + MaintenanceDaemonDBData *myDbData = NULL; - /* state kept for the background tasks queue monitor */ - TimestampTz lastBackgroundTaskQueueCheck = GetCurrentTimestamp(); - BackgroundWorkerHandle *backgroundTasksQueueBgwHandle = NULL; - bool backgroundTasksQueueWarnedForLock = false; - /* - * We do metadata sync in a separate background worker. We need its - * handle to be able to check its status. - */ - BackgroundWorkerHandle *metadataSyncBgwHandle = NULL; + bool isMainDb = false; - /* - * Look up this worker's configuration. - */ LWLockAcquire(&MaintenanceDaemonControl->lock, LW_EXCLUSIVE); - MaintenanceDaemonDBData *myDbData = (MaintenanceDaemonDBData *) - hash_search(MaintenanceDaemonDBHash, &databaseOid, - HASH_FIND, NULL); - if (!myDbData) - { - /* - * When the database crashes, background workers are restarted, but - * the state in shared memory is lost. In that case, we exit and - * wait for a session to call InitializeMaintenanceDaemonBackend - * to properly add it to the hash. - */ - proc_exit(0); + if (databaseOid == 0) + { + char *databaseName = MainDb; + + /* + * Since we cannot query databaseOid without initializing Postgres + * first, connect to the database by name. + */ + BackgroundWorkerInitializeConnection(databaseName, NULL, 0); + + /* + * Now we have a valid MyDatabaseId. + * Insert the hash entry for the database to the Maintenance Deamon Hash. + */ + bool found = false; + + myDbData = GetMaintenanceDaemonDBHashEntry(MyDatabaseId, &found); + + if (!myDbData) + { + /* + * If an entry cannot be created, + * return code of 1 requests worker restart + * Since BackgroundWorker for the MainDb is only registered + * once during server startup, we need to retry. + */ + proc_exit(1); + } + + if (found && myDbData->workerPid != 0) + { + /* Another maintenance daemon is running.*/ + + proc_exit(0); + } + + databaseOid = MyDatabaseId; + myDbData->userOid = GetSessionUserId(); + isMainDb = true; + } + else + { + myDbData = (MaintenanceDaemonDBData *) + hash_search(MaintenanceDaemonDBHash, &databaseOid, + HASH_FIND, NULL); + + if (!myDbData) + { + /* + * When the database crashes, background workers are restarted, but + * the state in shared memory is lost. In that case, we exit and + * wait for a session to call InitializeMaintenanceDaemonBackend + * to properly add it to the hash. + */ + + proc_exit(0); + } + + if (myDbData->workerPid != 0) + { + /* + * Another maintenance daemon is running. This usually happens because + * postgres restarts the daemon after an non-zero exit, and + * InitializeMaintenanceDaemonBackend started one before postgres did. + * In that case, the first one stays and the last one exits. + */ + + proc_exit(0); + } } - if (myDbData->workerPid != 0) - { - /* - * Another maintenance daemon is running. This usually happens because - * postgres restarts the daemon after an non-zero exit, and - * InitializeMaintenanceDaemonBackend started one before postgres did. - * In that case, the first one stays and the last one exits. - */ - - proc_exit(0); - } - - before_shmem_exit(MaintenanceDaemonShmemExit, main_arg); + before_shmem_exit(MaintenanceDaemonShmemExit, ObjectIdGetDatum(databaseOid)); /* * Signal that I am the maintenance daemon now. @@ -356,25 +455,55 @@ CitusMaintenanceDaemonMain(Datum main_arg) LWLockRelease(&MaintenanceDaemonControl->lock); - /* - * Setup error context so log messages can be properly attributed. Some of - * them otherwise sound like they might be from a normal user connection. - * Do so before setting up signals etc, so we never exit without the - * context setup. - */ - ErrorContextCallback errorCallback = { 0 }; memset(&errorCallback, 0, sizeof(errorCallback)); errorCallback.callback = MaintenanceDaemonErrorContext; errorCallback.arg = (void *) myDbData; errorCallback.previous = error_context_stack; error_context_stack = &errorCallback; - elog(LOG, "starting maintenance daemon on database %u user %u", databaseOid, myDbData->userOid); - /* connect to database, after that we can actually access catalogs */ - BackgroundWorkerInitializeConnectionByOid(databaseOid, myDbData->userOid, 0); + if (!isMainDb) + { + /* connect to database, after that we can actually access catalogs */ + BackgroundWorkerInitializeConnectionByOid(databaseOid, myDbData->userOid, 0); + } + + return myDbData; +} + + +/* + * CitusMaintenanceDaemonMain is the maintenance daemon's main routine, it'll + * be started by the background worker infrastructure. If it errors out, + * it'll be restarted after a few seconds. + */ +void +CitusMaintenanceDaemonMain(Datum main_arg) +{ + Oid databaseOid = DatumGetObjectId(main_arg); + TimestampTz nextStatsCollectionTime USED_WITH_LIBCURL_ONLY = + TimestampTzPlusMilliseconds(GetCurrentTimestamp(), 60 * 1000); + bool retryStatsCollection USED_WITH_LIBCURL_ONLY = false; + TimestampTz lastRecoveryTime = 0; + TimestampTz lastShardCleanTime = 0; + TimestampTz lastStatStatementsPurgeTime = 0; + TimestampTz nextMetadataSyncTime = 0; + + /* state kept for the background tasks queue monitor */ + TimestampTz lastBackgroundTaskQueueCheck = GetCurrentTimestamp(); + BackgroundWorkerHandle *backgroundTasksQueueBgwHandle = NULL; + bool backgroundTasksQueueWarnedForLock = false; + + + /* + * We do metadata sync in a separate background worker. We need its + * handle to be able to check its status. + */ + BackgroundWorkerHandle *metadataSyncBgwHandle = NULL; + + MaintenanceDaemonDBData *myDbData = ConnectToDatabase(databaseOid); /* make worker recognizable in pg_stat_activity */ pgstat_report_appname("Citus Maintenance Daemon"); @@ -383,7 +512,7 @@ CitusMaintenanceDaemonMain(Datum main_arg) * Terminate orphaned metadata sync daemons spawned from previously terminated * or crashed maintenanced instances. */ - SignalMetadataSyncDaemon(databaseOid, SIGTERM); + SignalMetadataSyncDaemon(MyDatabaseId, SIGTERM); /* enter main loop */ while (!got_SIGTERM) @@ -945,7 +1074,7 @@ MaintenanceDaemonShmemExit(int code, Datum arg) } -/* MaintenanceDaemonSigTermHandler calls proc_exit(0) */ +/* MaintenanceDaemonSigTermHandler sets the got_SIGTERM flag.*/ static void MaintenanceDaemonSigTermHandler(SIGNAL_ARGS) { diff --git a/src/include/distributed/maintenanced.h b/src/include/distributed/maintenanced.h index de1e68883..07387a7fd 100644 --- a/src/include/distributed/maintenanced.h +++ b/src/include/distributed/maintenanced.h @@ -20,6 +20,7 @@ /* config variable for */ extern double DistributedDeadlockDetectionTimeoutFactor; +extern char *MainDb; extern void StopMaintenanceDaemon(Oid databaseId); extern void TriggerNodeMetadataSync(Oid databaseId); @@ -27,6 +28,7 @@ extern void InitializeMaintenanceDaemon(void); extern size_t MaintenanceDaemonShmemSize(void); extern void MaintenanceDaemonShmemInit(void); extern void InitializeMaintenanceDaemonBackend(void); +extern void InitializeMaintenanceDaemonForMainDb(void); extern bool LockCitusExtension(void); extern PGDLLEXPORT void CitusMaintenanceDaemonMain(Datum main_arg); diff --git a/src/test/regress/citus_tests/common.py b/src/test/regress/citus_tests/common.py index 907102482..53c9c7944 100644 --- a/src/test/regress/citus_tests/common.py +++ b/src/test/regress/citus_tests/common.py @@ -453,6 +453,9 @@ def cleanup_test_leftovers(nodes): for node in nodes: node.cleanup_schemas() + for node in nodes: + node.cleanup_databases() + for node in nodes: node.cleanup_users() @@ -753,6 +756,7 @@ class Postgres(QueryRunner): self.subscriptions = set() self.publications = set() self.replication_slots = set() + self.databases = set() self.schemas = set() self.users = set() @@ -993,6 +997,10 @@ class Postgres(QueryRunner): args = sql.SQL("") self.sql(sql.SQL("CREATE USER {} {}").format(sql.Identifier(name), args)) + def create_database(self, name): + self.databases.add(name) + self.sql(sql.SQL("CREATE DATABASE {}").format(sql.Identifier(name))) + def create_schema(self, name): self.schemas.add(name) self.sql(sql.SQL("CREATE SCHEMA {}").format(sql.Identifier(name))) @@ -1020,6 +1028,12 @@ class Postgres(QueryRunner): for user in self.users: self.sql(sql.SQL("DROP USER IF EXISTS {}").format(sql.Identifier(user))) + def cleanup_databases(self): + for database in self.databases: + self.sql( + sql.SQL("DROP DATABASE IF EXISTS {}").format(sql.Identifier(database)) + ) + def cleanup_schemas(self): for schema in self.schemas: self.sql( diff --git a/src/test/regress/citus_tests/test/test_maintenancedeamon.py b/src/test/regress/citus_tests/test/test_maintenancedeamon.py new file mode 100644 index 000000000..3f6cb501e --- /dev/null +++ b/src/test/regress/citus_tests/test/test_maintenancedeamon.py @@ -0,0 +1,74 @@ +# This test checks that once citus.main_db is set and the +# server is restarted. A Citus Maintenance Daemon for the main_db +# is launched. This should happen even if there is no query run +# in main_db yet. +import time + + +def wait_until_maintenance_deamons_start(deamoncount, cluster): + i = 0 + n = 0 + + while i < 10: + i += 1 + n = cluster.coordinator.sql_value( + "SELECT count(*) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon';" + ) + + if n == deamoncount: + break + + time.sleep(0.1) + + assert n == deamoncount + + +def test_set_maindb(cluster_factory): + cluster = cluster_factory(0) + + # Test that once citus.main_db is set to a database name + # there are two maintenance deamons running upon restart. + # One maintenance deamon for the database of the current connection + # and one for the citus.main_db. + cluster.coordinator.create_database("mymaindb") + cluster.coordinator.configure("citus.main_db='mymaindb'") + cluster.coordinator.restart() + + assert cluster.coordinator.sql_value("SHOW citus.main_db;") == "mymaindb" + + wait_until_maintenance_deamons_start(2, cluster) + + assert ( + cluster.coordinator.sql_value( + "SELECT count(*) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon' AND datname='mymaindb';" + ) + == 1 + ) + + # Test that once citus.main_db is set to empty string + # there is only one maintenance deamon for the database + # of the current connection. + cluster.coordinator.configure("citus.main_db=''") + cluster.coordinator.restart() + assert cluster.coordinator.sql_value("SHOW citus.main_db;") == "" + + wait_until_maintenance_deamons_start(1, cluster) + + # Test that after citus.main_db is dropped. The maintenance + # deamon for this database is terminated. + cluster.coordinator.configure("citus.main_db='mymaindb'") + cluster.coordinator.restart() + assert cluster.coordinator.sql_value("SHOW citus.main_db;") == "mymaindb" + + wait_until_maintenance_deamons_start(2, cluster) + + cluster.coordinator.sql("DROP DATABASE mymaindb;") + + wait_until_maintenance_deamons_start(1, cluster) + + assert ( + cluster.coordinator.sql_value( + "SELECT count(*) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon' AND datname='mymaindb';" + ) + == 0 + ) diff --git a/src/test/regress/expected/validate_constraint.out b/src/test/regress/expected/validate_constraint.out index 08b03a2bf..b38e835fd 100644 --- a/src/test/regress/expected/validate_constraint.out +++ b/src/test/regress/expected/validate_constraint.out @@ -133,12 +133,6 @@ ORDER BY 1, 2; validatable_constraint_8000016 | t (10 rows) -DROP TABLE constrained_table; -DROP TABLE referenced_table CASCADE; -DROP TABLE referencing_table; +SET client_min_messages TO WARNING; DROP SCHEMA validate_constraint CASCADE; -NOTICE: drop cascades to 3 other objects -DETAIL: drop cascades to type constraint_validity -drop cascades to view constraint_validations_in_workers -drop cascades to view constraint_validations SET search_path TO DEFAULT; diff --git a/src/test/regress/multi_1_schedule b/src/test/regress/multi_1_schedule index ad70f136e..e824f5cba 100644 --- a/src/test/regress/multi_1_schedule +++ b/src/test/regress/multi_1_schedule @@ -201,7 +201,8 @@ test: citus_copy_shard_placement # multi_utilities cannot be run in parallel with other tests because it checks # global locks test: multi_utilities -test: foreign_key_to_reference_table validate_constraint +test: foreign_key_to_reference_table +test: validate_constraint test: multi_repartition_udt multi_repartitioned_subquery_udf multi_subtransactions test: multi_modifying_xacts diff --git a/src/test/regress/multi_schedule_hyperscale b/src/test/regress/multi_schedule_hyperscale index 8849e81f2..86ac16d4f 100644 --- a/src/test/regress/multi_schedule_hyperscale +++ b/src/test/regress/multi_schedule_hyperscale @@ -154,7 +154,8 @@ test: multi_outer_join # --- test: multi_complex_count_distinct test: multi_upsert multi_simple_queries -test: foreign_key_to_reference_table validate_constraint +test: foreign_key_to_reference_table +test: validate_constraint # --------- # creates hash and range-partitioned tables and performs COPY diff --git a/src/test/regress/multi_schedule_hyperscale_superuser b/src/test/regress/multi_schedule_hyperscale_superuser index 052b93786..f5cddfc05 100644 --- a/src/test/regress/multi_schedule_hyperscale_superuser +++ b/src/test/regress/multi_schedule_hyperscale_superuser @@ -150,7 +150,9 @@ test: multi_outer_join test: multi_create_fdw test: multi_generate_ddl_commands multi_create_shards multi_prune_shard_list test: multi_upsert multi_simple_queries multi_data_types -test: multi_utilities foreign_key_to_reference_table validate_constraint +test: multi_utilities +test: foreign_key_to_reference_table +test: validate_constraint test: multi_repartition_udt multi_repartitioned_subquery_udf # --------- diff --git a/src/test/regress/sql/validate_constraint.sql b/src/test/regress/sql/validate_constraint.sql index 294e9a8b2..bb63f2854 100644 --- a/src/test/regress/sql/validate_constraint.sql +++ b/src/test/regress/sql/validate_constraint.sql @@ -116,9 +116,6 @@ SELECT * FROM constraint_validations_in_workers ORDER BY 1, 2; -DROP TABLE constrained_table; -DROP TABLE referenced_table CASCADE; -DROP TABLE referencing_table; - +SET client_min_messages TO WARNING; DROP SCHEMA validate_constraint CASCADE; SET search_path TO DEFAULT;