Merge remote-tracking branch 'upstream/main' into gokhangulbiz/tenant-stats-perf-improvements

pull/6868/head
Gokhan Gulbiz 2023-04-26 11:33:28 +03:00
commit 779af6a7b4
No known key found for this signature in database
GPG Key ID: 608EF06B6BD1B45B
26 changed files with 1861 additions and 137 deletions

View File

@ -6,7 +6,7 @@ orbs:
parameters: parameters:
image_suffix: image_suffix:
type: string type: string
default: '-v3417e8d' default: '-vabeb997'
pg13_version: pg13_version:
type: string type: string
default: '13.10' default: '13.10'
@ -421,6 +421,63 @@ jobs:
- coverage: - coverage:
flags: 'test_<< parameters.pg_major >>,upgrade' flags: 'test_<< parameters.pg_major >>,upgrade'
test-query-generator:
description: Expects that the generated queries that are run on distributed and local tables would have the same results
parameters:
pg_major:
description: 'postgres major version'
type: integer
image:
description: 'docker image to use as for the tests'
type: string
default: citus/failtester
image_tag:
description: 'docker image tag to use'
type: string
docker:
- image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>'
working_directory: /home/circleci/project
steps:
- checkout
- attach_workspace:
at: .
- install_extension:
pg_major: << parameters.pg_major >>
- configure
- enable_core
- run:
name: 'Run Test'
command: |
gosu circleci make -C src/test/regress check-query-generator
no_output_timeout: 5m
- run:
name: 'Show regressions'
command: |
find src/test/regress/citus_tests/query_generator/out/ -name "local_dist.diffs" -exec cat {} +
lines=$(find src/test/regress/citus_tests/query_generator/out/ -name "local_dist.diffs" | wc -l)
if [ $lines -ne 0 ]; then
exit 1
fi
when: on_fail
- run:
name: 'Copy logfiles'
command: |
mkdir src/test/regress/tmp_citus_test/logfiles
find src/test/regress/tmp_citus_test/ -name "logfile_*" -exec cp -t src/test/regress/tmp_citus_test/logfiles/ {} +
when: on_fail
- store_artifacts:
name: 'Save logfiles'
path: src/test/regress/tmp_citus_test/logfiles
- store_artifacts:
name: 'Save ddls'
path: src/test/regress/citus_tests/query_generator/out/ddls.sql
- store_artifacts:
name: 'Save dmls'
path: src/test/regress/citus_tests/query_generator/out/queries.sql
- stack_trace
- coverage:
flags: 'test_<< parameters.pg_major >>,querygen'
test-citus: test-citus:
description: Runs the common tests of citus description: Runs the common tests of citus
parameters: parameters:
@ -935,6 +992,24 @@ workflows:
image_tag: '<< pipeline.parameters.pg15_version >>' image_tag: '<< pipeline.parameters.pg15_version >>'
requires: [build-15] requires: [build-15]
- test-query-generator:
name: 'test-13_check-query-generator'
pg_major: 13
image_tag: '<< pipeline.parameters.pg13_version >>'
requires: [build-13]
- test-query-generator:
name: 'test-14_check-query-generator'
pg_major: 14
image_tag: '<< pipeline.parameters.pg14_version >>'
requires: [build-14]
- test-query-generator:
name: 'test-15_check-query-generator'
pg_major: 15
image_tag: '<< pipeline.parameters.pg15_version >>'
requires: [build-15]
- test-pg-upgrade: - test-pg-upgrade:
name: 'test-13-14_check-pg-upgrade' name: 'test-13-14_check-pg-upgrade'
old_pg_major: 13 old_pg_major: 13
@ -975,6 +1050,7 @@ workflows:
- test-13_check-enterprise-failure - test-13_check-enterprise-failure
- test-13_check-split - test-13_check-split
- test-13_check-arbitrary-configs - test-13_check-arbitrary-configs
- test-13_check-query-generator
- test-14_check-multi - test-14_check-multi
- test-14_check-multi-1 - test-14_check-multi-1
- test-14_check-mx - test-14_check-mx
@ -993,6 +1069,7 @@ workflows:
- test-14_check-enterprise-failure - test-14_check-enterprise-failure
- test-14_check-split - test-14_check-split
- test-14_check-arbitrary-configs - test-14_check-arbitrary-configs
- test-14_check-query-generator
- test-15_check-multi - test-15_check-multi
- test-15_check-multi-1 - test-15_check-multi-1
- test-15_check-mx - test-15_check-mx
@ -1011,6 +1088,7 @@ workflows:
- test-15_check-enterprise-failure - test-15_check-enterprise-failure
- test-15_check-split - test-15_check-split
- test-15_check-arbitrary-configs - test-15_check-arbitrary-configs
- test-15_check-query-generator
- test-13-14_check-pg-upgrade - test-13-14_check-pg-upgrade
- test-14-15_check-pg-upgrade - test-14-15_check-pg-upgrade
- test-13_check-citus-upgrade - test-13_check-citus-upgrade

View File

@ -1,3 +1,129 @@
### citus v11.2.1 (April 20, 2023) ###
* Correctly reports shard size in `citus_shards` view (#6748)
* Fixes a bug in shard copy operations (#6721)
* Fixes a bug with `INSERT .. SELECT` queries with identity columns (#6802)
* Fixes an uninitialized memory access in shard split API (#6845)
* Fixes compilation for PG13.10 and PG14.7 (#6711)
* Fixes memory leak in `alter_distributed_table` (#6726)
* Fixes memory leak issue with query results that returns single row (#6724)
* Prevents using `alter_distributed_table` and `undistribute_table` UDFs when a
table has identity columns (#6738)
* Prevents using identity columns on data types other than `bigint` on
distributed tables (#6738)
### citus v11.1.6 (April 20, 2023) ###
* Correctly reports shard size in `citus_shards` view (#6748)
* Fixes a bug in shard copy operations (#6721)
* Fixes a bug that breaks pg upgrades if the user has a columnar table (#6624)
* Fixes a bug that causes background rebalancer to fail when a reference table
doesn't have a primary key (#6682)
* Fixes a regression in allowed foreign keys on distributed tables (#6550)
* Fixes a use-after-free bug in connection management (#6685)
* Fixes an unexpected foreign table error by disallowing to drop the
`table_name` option (#6669)
* Fixes an uninitialized memory access in shard split API (#6845)
* Fixes compilation for PG13.10 and PG14.7 (#6711)
* Fixes crash that happens when trying to replicate a reference table that is
actually dropped (#6595)
* Fixes memory leak issue with query results that returns single row (#6724)
* Fixes the modifiers for subscription and role creation (#6603)
* Makes sure to quote all identifiers used for logical replication to prevent
potential issues (#6604)
* Makes sure to skip foreign key validations at the end of shard moves (#6640)
### citus v11.0.8 (April 20, 2023) ###
* Correctly reports shard size in `citus_shards` view (#6748)
* Fixes a bug that breaks pg upgrades if the user has a columnar table (#6624)
* Fixes an unexpected foreign table error by disallowing to drop the
`table_name` option (#6669)
* Fixes compilation warning on PG13 + OpenSSL 3.0 (#6038, #6502)
* Fixes crash that happens when trying to replicate a reference table that is
actually dropped (#6595)
* Fixes memory leak issue with query results that returns single row (#6724)
* Fixes the modifiers for subscription and role creation (#6603)
* Fixes two potential dangling pointer issues (#6504, #6507)
* Makes sure to quote all identifiers used for logical replication to prevent
potential issues (#6604)
### citus v10.2.9 (April 20, 2023) ###
* Correctly reports shard size in `citus_shards` view (#6748)
* Fixes a bug in `ALTER EXTENSION citus UPDATE` (#6383)
* Fixes a bug that breaks pg upgrades if the user has a columnar table (#6624)
* Fixes a bug that prevents retaining columnar table options after a
table-rewrite (#6337)
* Fixes memory leak issue with query results that returns single row (#6724)
* Raises memory limits in columnar from 256MB to 1GB for reads and writes
(#6419)
### citus v10.1.5 (April 20, 2023) ###
* Fixes a crash that occurs when the aggregate that cannot be pushed-down
returns empty result from a worker (#5679)
* Fixes columnar freezing/wraparound bug (#5962)
* Fixes memory leak issue with query results that returns single row (#6724)
* Prevents alter table functions from dropping extensions (#5974)
### citus v10.0.7 (April 20, 2023) ###
* Fixes a crash that occurs when the aggregate that cannot be pushed-down
returns empty result from a worker (#5679)
* Fixes columnar freezing/wraparound bug (#5962)
* Fixes memory leak issue with query results that returns single row (#6724)
* Prevents alter table functions from dropping extensions (#5974)
### citus v9.5.11 (April 20, 2023) ###
* Fixes a crash that occurs when the aggregate that cannot be pushed-down
returns empty result from a worker (#5679)
* Fixes memory leak issue with query results that returns single row (#6724)
* Prevents alter table functions from dropping extensions (#5974)
### citus v11.2.0 (January 30, 2023) ### ### citus v11.2.0 (January 30, 2023) ###
* Adds support for outer joins with reference tables / complex subquery-CTEs * Adds support for outer joins with reference tables / complex subquery-CTEs

View File

@ -34,6 +34,7 @@ MULTI_REGRESS_OPTS = --inputdir=$(citus_abs_srcdir) $(pg_regress_locale_flags) -
pg_upgrade_check = $(citus_abs_srcdir)/citus_tests/upgrade/pg_upgrade_test.py pg_upgrade_check = $(citus_abs_srcdir)/citus_tests/upgrade/pg_upgrade_test.py
citus_upgrade_check =CITUS_OLD_VERSION=$(citus-old-version) $(citus_abs_srcdir)/citus_tests/upgrade/citus_upgrade_test.py citus_upgrade_check =CITUS_OLD_VERSION=$(citus-old-version) $(citus_abs_srcdir)/citus_tests/upgrade/citus_upgrade_test.py
arbitrary_config_check = $(citus_abs_srcdir)/citus_tests/arbitrary_configs/citus_arbitrary_configs.py arbitrary_config_check = $(citus_abs_srcdir)/citus_tests/arbitrary_configs/citus_arbitrary_configs.py
query_generator_check = $(citus_abs_srcdir)/citus_tests/query_generator/bin/run_query_compare_test.py
template_isolation_files = $(shell find $(citus_abs_srcdir)/spec/ -name '*.spec') template_isolation_files = $(shell find $(citus_abs_srcdir)/spec/ -name '*.spec')
generated_isolation_files = $(patsubst $(citus_abs_srcdir)/spec/%,$(citus_abs_srcdir)/build/specs/%,$(template_isolation_files)) generated_isolation_files = $(patsubst $(citus_abs_srcdir)/spec/%,$(citus_abs_srcdir)/build/specs/%,$(template_isolation_files))
@ -44,7 +45,7 @@ vanilla_diffs_file = $(citus_abs_srcdir)/pg_vanilla_outputs/$(MAJORVERSION)/regr
# intermediate, for muscle memory backward compatibility. # intermediate, for muscle memory backward compatibility.
check: check-full check-enterprise-full check: check-full check-enterprise-full
# check-full triggers all tests that ought to be run routinely # check-full triggers all tests that ought to be run routinely
check-full: check-multi check-multi-mx check-multi-1 check-operations check-follower-cluster check-isolation check-failure check-split check-vanilla check-columnar check-columnar-isolation check-pg-upgrade check-arbitrary-configs check-citus-upgrade check-citus-upgrade-mixed check-citus-upgrade-local check-citus-upgrade-mixed-local check-pytest check-full: check-multi check-multi-mx check-multi-1 check-operations check-follower-cluster check-isolation check-failure check-split check-vanilla check-columnar check-columnar-isolation check-pg-upgrade check-arbitrary-configs check-citus-upgrade check-citus-upgrade-mixed check-citus-upgrade-local check-citus-upgrade-mixed-local check-pytest check-query-generator
# check-enterprise-full triggers all enterprise specific tests # check-enterprise-full triggers all enterprise specific tests
check-enterprise-full: check-enterprise check-enterprise-isolation check-enterprise-failure check-enterprise-isolation-logicalrep-1 check-enterprise-isolation-logicalrep-2 check-enterprise-isolation-logicalrep-3 check-enterprise-full: check-enterprise check-enterprise-isolation check-enterprise-failure check-enterprise-isolation-logicalrep-1 check-enterprise-isolation-logicalrep-2 check-enterprise-isolation-logicalrep-3
@ -262,6 +263,9 @@ check-arbitrary-base: all
check-pytest: check-pytest:
pytest -n auto pytest -n auto
check-query-generator: all
${query_generator_check} --bindir=$(bindir) --pgxsdir=$(pgxsdir)
check-citus-upgrade: all check-citus-upgrade: all
$(citus_upgrade_check) \ $(citus_upgrade_check) \
--bindir=$(bindir) \ --bindir=$(bindir) \

View File

@ -15,6 +15,7 @@ pytest-asyncio = "*"
pytest-timeout = "*" pytest-timeout = "*"
pytest-xdist = "*" pytest-xdist = "*"
pytest-repeat = "*" pytest-repeat = "*"
pyyaml = "*"
[dev-packages] [dev-packages]
black = "*" black = "*"

View File

@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "eb9ca3a7b05e76c7ac60179a1755f89600dfb215e02bf08c258d548df1d96025" "sha256": "9568b1f3e4d4fd408e5e263f6346b0a4f479ac88e02f64bb79a9d482096e6a03"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": { "requires": {
@ -24,14 +24,6 @@
"markers": "python_version >= '3.6'", "markers": "python_version >= '3.6'",
"version": "==3.4.1" "version": "==3.4.1"
}, },
"attrs": {
"hashes": [
"sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836",
"sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"
],
"markers": "python_version >= '3.6'",
"version": "==22.2.0"
},
"blinker": { "blinker": {
"hashes": [ "hashes": [
"sha256:471aee25f3992bd325afa3772f1063dbdbbca947a041b8b89466dc00d606f8b6" "sha256:471aee25f3992bd325afa3772f1063dbdbbca947a041b8b89466dc00d606f8b6"
@ -219,32 +211,28 @@
}, },
"cryptography": { "cryptography": {
"hashes": [ "hashes": [
"sha256:103e8f7155f3ce2ffa0049fe60169878d47a4364b277906386f8de21c9234aa1", "sha256:05dc219433b14046c476f6f09d7636b92a1c3e5808b9a6536adf4932b3b2c440",
"sha256:23df8ca3f24699167daf3e23e51f7ba7334d504af63a94af468f468b975b7dd7", "sha256:0dcca15d3a19a66e63662dc8d30f8036b07be851a8680eda92d079868f106288",
"sha256:2725672bb53bb92dc7b4150d233cd4b8c59615cd8288d495eaa86db00d4e5c06", "sha256:142bae539ef28a1c76794cca7f49729e7c54423f615cfd9b0b1fa90ebe53244b",
"sha256:30b1d1bfd00f6fc80d11300a29f1d8ab2b8d9febb6ed4a38a76880ec564fae84", "sha256:3daf9b114213f8ba460b829a02896789751626a2a4e7a43a28ee77c04b5e4958",
"sha256:35d658536b0a4117c885728d1a7032bdc9a5974722ae298d6c533755a6ee3915", "sha256:48f388d0d153350f378c7f7b41497a54ff1513c816bcbbcafe5b829e59b9ce5b",
"sha256:50cadb9b2f961757e712a9737ef33d89b8190c3ea34d0fb6675e00edbe35d074", "sha256:4df2af28d7bedc84fe45bd49bc35d710aede676e2a4cb7fc6d103a2adc8afe4d",
"sha256:5f8c682e736513db7d04349b4f6693690170f95aac449c56f97415c6980edef5", "sha256:4f01c9863da784558165f5d4d916093737a75203a5c5286fde60e503e4276c7a",
"sha256:6236a9610c912b129610eb1a274bdc1350b5df834d124fa84729ebeaf7da42c3", "sha256:7a38250f433cd41df7fcb763caa3ee9362777fdb4dc642b9a349721d2bf47404",
"sha256:788b3921d763ee35dfdb04248d0e3de11e3ca8eb22e2e48fef880c42e1f3c8f9", "sha256:8f79b5ff5ad9d3218afb1e7e20ea74da5f76943ee5edb7f76e56ec5161ec782b",
"sha256:8bc0008ef798231fac03fe7d26e82d601d15bd16f3afaad1c6113771566570f3", "sha256:956ba8701b4ffe91ba59665ed170a2ebbdc6fc0e40de5f6059195d9f2b33ca0e",
"sha256:8f35c17bd4faed2bc7797d2a66cbb4f986242ce2e30340ab832e5d99ae60e011", "sha256:a04386fb7bc85fab9cd51b6308633a3c271e3d0d3eae917eebab2fac6219b6d2",
"sha256:b49a88ff802e1993b7f749b1eeb31134f03c8d5c956e3c125c75558955cda536", "sha256:a95f4802d49faa6a674242e25bfeea6fc2acd915b5e5e29ac90a32b1139cae1c",
"sha256:bc0521cce2c1d541634b19f3ac661d7a64f9555135e9d8af3980965be717fd4a", "sha256:adc0d980fd2760c9e5de537c28935cc32b9353baaf28e0814df417619c6c8c3b",
"sha256:bc5b871e977c8ee5a1bbc42fa8d19bcc08baf0c51cbf1586b0e87a2694dde42f", "sha256:aecbb1592b0188e030cb01f82d12556cf72e218280f621deed7d806afd2113f9",
"sha256:c43ac224aabcbf83a947eeb8b17eaf1547bce3767ee2d70093b461f31729a480", "sha256:b12794f01d4cacfbd3177b9042198f3af1c856eedd0a98f10f141385c809a14b",
"sha256:d15809e0dbdad486f4ad0979753518f47980020b7a34e9fc56e8be4f60702fac", "sha256:c0764e72b36a3dc065c155e5b22f93df465da9c39af65516fe04ed3c68c92636",
"sha256:d7d84a512a59f4412ca8549b01f94be4161c94efc598bf09d027d67826beddc0", "sha256:c33c0d32b8594fa647d2e01dbccc303478e16fdd7cf98652d5b3ed11aa5e5c99",
"sha256:e029b844c21116564b8b61216befabca4b500e6816fa9f0ba49527653cae2108", "sha256:cbaba590180cba88cb99a5f76f90808a624f18b169b90a4abb40c1fd8c19420e",
"sha256:e8a0772016feeb106efd28d4a328e77dc2edae84dfbac06061319fdb669ff828", "sha256:d5a1bd0e9e2031465761dfa920c16b0065ad77321d8a8c1f5ee331021fda65e9"
"sha256:e944fe07b6f229f4c1a06a7ef906a19652bdd9fd54c761b0ff87e83ae7a30354",
"sha256:eb40fe69cfc6f5cdab9a5ebd022131ba21453cf7b8a7fd3631f45bbf52bed612",
"sha256:fa507318e427169ade4e9eccef39e9011cdc19534f55ca2f36ec3f388c1f70f3",
"sha256:ffd394c7896ed7821a6d13b24657c6a34b6e2650bd84ae063cf11ccffa4f1a97"
], ],
"index": "pypi", "index": "pypi",
"version": "==39.0.2" "version": "==40.0.2"
}, },
"docopt": { "docopt": {
"hashes": [ "hashes": [
@ -271,11 +259,11 @@
}, },
"filelock": { "filelock": {
"hashes": [ "hashes": [
"sha256:3199fd0d3faea8b911be52b663dfccceb84c95949dd13179aa21436d1a79c4ce", "sha256:ad98852315c2ab702aeb628412cbf7e95b7ce8c3bf9565670b4eaecf1db370a9",
"sha256:e90b34656470756edf8b19656785c5fea73afa1953f3e1b0d645cef11cab3182" "sha256:fc03ae43288c013d2ea83c8597001b1129db351aad9c57fe2409327916b8e718"
], ],
"index": "pypi", "index": "pypi",
"version": "==3.10.0" "version": "==3.12.0"
}, },
"flask": { "flask": {
"hashes": [ "hashes": [
@ -488,11 +476,11 @@
}, },
"packaging": { "packaging": {
"hashes": [ "hashes": [
"sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2", "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61",
"sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97" "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"
], ],
"markers": "python_version >= '3.7'", "markers": "python_version >= '3.7'",
"version": "==23.0" "version": "==23.1"
}, },
"passlib": { "passlib": {
"hashes": [ "hashes": [
@ -553,21 +541,11 @@
}, },
"pyasn1": { "pyasn1": {
"hashes": [ "hashes": [
"sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359", "sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57",
"sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576", "sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde"
"sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf",
"sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7",
"sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d",
"sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00",
"sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8",
"sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86",
"sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12",
"sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776",
"sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba",
"sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2",
"sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3"
], ],
"version": "==0.4.8" "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
"version": "==0.5.0"
}, },
"pycparser": { "pycparser": {
"hashes": [ "hashes": [
@ -578,11 +556,11 @@
}, },
"pyopenssl": { "pyopenssl": {
"hashes": [ "hashes": [
"sha256:c1cc5f86bcacefc84dada7d31175cae1b1518d5f60d3d0bb595a67822a868a6f", "sha256:841498b9bec61623b1b6c47ebbc02367c07d60e0e195f19790817f10cc8db0b7",
"sha256:df5fc28af899e74e19fccb5510df423581047e10ab6f1f4ba1763ff5fde844c0" "sha256:9e0c526404a210df9d2b18cd33364beadb0dc858a739b885677bc65e105d4a4c"
], ],
"markers": "python_version >= '3.6'", "markers": "python_version >= '3.6'",
"version": "==23.0.0" "version": "==23.1.1"
}, },
"pyparsing": { "pyparsing": {
"hashes": [ "hashes": [
@ -600,19 +578,19 @@
}, },
"pytest": { "pytest": {
"hashes": [ "hashes": [
"sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e", "sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362",
"sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4" "sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3"
], ],
"index": "pypi", "index": "pypi",
"version": "==7.2.2" "version": "==7.3.1"
}, },
"pytest-asyncio": { "pytest-asyncio": {
"hashes": [ "hashes": [
"sha256:83cbf01169ce3e8eb71c6c278ccb0574d1a7a3bb8eaaf5e50e0ad342afb33b36", "sha256:2b38a496aef56f56b0e87557ec313e11e1ab9276fc3863f6a7be0f1d0e415e1b",
"sha256:f129998b209d04fcc65c96fc85c11e5316738358909a8399e93be553d7656442" "sha256:f2b3366b7cd501a4056858bd39349d5af19742aed2d81660b7998b6341c7eb9c"
], ],
"index": "pypi", "index": "pypi",
"version": "==0.20.3" "version": "==0.21.0"
}, },
"pytest-repeat": { "pytest-repeat": {
"hashes": [ "hashes": [
@ -638,6 +616,52 @@
"index": "pypi", "index": "pypi",
"version": "==3.2.1" "version": "==3.2.1"
}, },
"pyyaml": {
"hashes": [
"sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf",
"sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293",
"sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b",
"sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57",
"sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b",
"sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4",
"sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07",
"sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba",
"sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9",
"sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287",
"sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513",
"sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0",
"sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782",
"sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0",
"sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92",
"sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f",
"sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2",
"sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc",
"sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1",
"sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c",
"sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86",
"sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4",
"sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c",
"sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34",
"sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b",
"sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d",
"sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c",
"sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb",
"sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7",
"sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737",
"sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3",
"sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d",
"sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358",
"sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53",
"sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78",
"sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803",
"sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a",
"sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f",
"sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174",
"sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"
],
"index": "pypi",
"version": "==6.0"
},
"ruamel.yaml": { "ruamel.yaml": {
"hashes": [ "hashes": [
"sha256:1a771fc92d3823682b7f0893ad56cb5a5c87c48e62b5399d6f42c8759a583b33", "sha256:1a771fc92d3823682b7f0893ad56cb5a5c87c48e62b5399d6f42c8759a583b33",
@ -705,20 +729,20 @@
}, },
"tornado": { "tornado": {
"hashes": [ "hashes": [
"sha256:1d54d13ab8414ed44de07efecb97d4ef7c39f7438cf5e976ccd356bebb1b5fca", "sha256:4546003dc8b5733489139d3bff5fa6a0211be505faf819bd9970e7c2b32e8122",
"sha256:20f638fd8cc85f3cbae3c732326e96addff0a15e22d80f049e00121651e82e72", "sha256:4d349846931557b7ec92f224b5d598b160e2ba26ae1812480b42e9622c884bf7",
"sha256:5c87076709343557ef8032934ce5f637dbb552efa7b21d08e89ae7619ed0eb23", "sha256:6164571f5b9f73143d1334df4584cb9ac86d20c461e17b6c189a19ead8bb93c1",
"sha256:5f8c52d219d4995388119af7ccaa0bcec289535747620116a58d830e7c25d8a8", "sha256:6cfff1e9c15c79e106b8352269d201f8fc0815914a6260f3893ca18b724ea94b",
"sha256:6fdfabffd8dfcb6cf887428849d30cf19a3ea34c2c248461e1f7d718ad30b66b", "sha256:720f53e6367b38190ae7fa398c25c086c69d88b3c6535bd6021a126b727fb5cd",
"sha256:87dcafae3e884462f90c90ecc200defe5e580a7fbbb4365eda7c7c1eb809ebc9", "sha256:912df5712024564e362ecce43c8d5862e14c78c8dd3846c9d889d44fbd7f4951",
"sha256:9b630419bde84ec666bfd7ea0a4cb2a8a651c2d5cccdbdd1972a0c859dfc3c13", "sha256:c37b6a384d54ce6a31168d40ab21ad2591ddaf34973075cc0cad154402ecd9e8",
"sha256:b8150f721c101abdef99073bf66d3903e292d851bee51910839831caba341a75", "sha256:c659ab04d5aa477dbe44152c67d93f3ad3243b992d94f795ca1d5c73c37337ce",
"sha256:ba09ef14ca9893954244fd872798b4ccb2367c165946ce2dd7376aebdde8e3ac", "sha256:c9114a61a4588c09065b9996ae05462350d17160b92b9bf9a1e93689cc0424dc",
"sha256:d3a2f5999215a3a06a4fc218026cd84c61b8b2b40ac5296a6db1f1451ef04c1e", "sha256:d68f3192936ff2c4add04dc21a436a43b4408d466746b78bb2b9d0a53a18683f",
"sha256:e5f923aa6a47e133d1cf87d60700889d7eae68988704e20c75fb2d65677a8e4b" "sha256:d7b737e18f701de3e4a3b0824260b4d740e4d60607b8089bb80e80ffd464780e"
], ],
"markers": "python_version >= '3.7'", "markers": "python_version >= '3.8'",
"version": "==6.2" "version": "==6.3"
}, },
"typing-extensions": { "typing-extensions": {
"hashes": [ "hashes": [
@ -808,42 +832,42 @@
"develop": { "develop": {
"attrs": { "attrs": {
"hashes": [ "hashes": [
"sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836", "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04",
"sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99" "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"
], ],
"markers": "python_version >= '3.6'", "markers": "python_version >= '3.7'",
"version": "==22.2.0" "version": "==23.1.0"
}, },
"black": { "black": {
"hashes": [ "hashes": [
"sha256:0052dba51dec07ed029ed61b18183942043e00008ec65d5028814afaab9a22fd", "sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5",
"sha256:0680d4380db3719ebcfb2613f34e86c8e6d15ffeabcf8ec59355c5e7b85bb555", "sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915",
"sha256:121ca7f10b4a01fd99951234abdbd97728e1240be89fde18480ffac16503d481", "sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326",
"sha256:162e37d49e93bd6eb6f1afc3e17a3d23a823042530c37c3c42eeeaf026f38468", "sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940",
"sha256:2a951cc83ab535d248c89f300eccbd625e80ab880fbcfb5ac8afb5f01a258ac9", "sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b",
"sha256:2bf649fda611c8550ca9d7592b69f0637218c2369b7744694c5e4902873b2f3a", "sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30",
"sha256:382998821f58e5c8238d3166c492139573325287820963d2f7de4d518bd76958", "sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c",
"sha256:49f7b39e30f326a34b5c9a4213213a6b221d7ae9d58ec70df1c4a307cf2a1580", "sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c",
"sha256:57c18c5165c1dbe291d5306e53fb3988122890e57bd9b3dcb75f967f13411a26", "sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab",
"sha256:7a0f701d314cfa0896b9001df70a530eb2472babb76086344e688829efd97d32", "sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27",
"sha256:8178318cb74f98bc571eef19068f6ab5613b3e59d4f47771582f04e175570ed8", "sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2",
"sha256:8b70eb40a78dfac24842458476135f9b99ab952dd3f2dab738c1881a9b38b753", "sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961",
"sha256:9880d7d419bb7e709b37e28deb5e68a49227713b623c72b2b931028ea65f619b", "sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9",
"sha256:9afd3f493666a0cd8f8df9a0200c6359ac53940cbde049dcb1a7eb6ee2dd7074", "sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb",
"sha256:a29650759a6a0944e7cca036674655c2f0f63806ddecc45ed40b7b8aa314b651", "sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70",
"sha256:a436e7881d33acaf2536c46a454bb964a50eff59b21b51c6ccf5a40601fbef24", "sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331",
"sha256:a59db0a2094d2259c554676403fa2fac3473ccf1354c1c63eccf7ae65aac8ab6", "sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2",
"sha256:a8471939da5e824b891b25751955be52ee7f8a30a916d570a5ba8e0f2eb2ecad", "sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266",
"sha256:b0bd97bea8903f5a2ba7219257a44e3f1f9d00073d6cc1add68f0beec69692ac", "sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d",
"sha256:b6a92a41ee34b883b359998f0c8e6eb8e99803aa8bf3123bf2b2e6fec505a221", "sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6",
"sha256:bb460c8561c8c1bec7824ecbc3ce085eb50005883a6203dcfb0122e95797ee06", "sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b",
"sha256:bfffba28dc52a58f04492181392ee380e95262af14ee01d4bc7bb1b1c6ca8d27", "sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925",
"sha256:c1c476bc7b7d021321e7d93dc2cbd78ce103b84d5a4cf97ed535fbc0d6660648", "sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8",
"sha256:c91dfc2c2a4e50df0026f88d2215e166616e0c80e86004d0003ece0488db2739", "sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4",
"sha256:e6663f91b6feca5d06f2ccd49a10f254f9298cc1f7f49c46e498a0771b507104" "sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3"
], ],
"index": "pypi", "index": "pypi",
"version": "==23.1.0" "version": "==23.3.0"
}, },
"click": { "click": {
"hashes": [ "hashes": [
@ -863,11 +887,11 @@
}, },
"flake8-bugbear": { "flake8-bugbear": {
"hashes": [ "hashes": [
"sha256:beb5c7efcd7ccc2039ef66a77bb8db925e7be3531ff1cb4d0b7030d0e2113d72", "sha256:8a218d13abd6904800970381057ce8e72cde8eea743240c4ef3ede4dd0bc9cfb",
"sha256:e3e7f74c8a49ad3794a7183353026dabd68c74030d5f46571f84c1fb0eb79363" "sha256:ea565bdb87b96b56dc499edd6cc3ba7f695373d902a5f56c989b74fad7c7719d"
], ],
"index": "pypi", "index": "pypi",
"version": "==23.3.12" "version": "==23.3.23"
}, },
"isort": { "isort": {
"hashes": [ "hashes": [
@ -895,11 +919,11 @@
}, },
"packaging": { "packaging": {
"hashes": [ "hashes": [
"sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2", "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61",
"sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97" "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"
], ],
"markers": "python_version >= '3.7'", "markers": "python_version >= '3.7'",
"version": "==23.0" "version": "==23.1"
}, },
"pathspec": { "pathspec": {
"hashes": [ "hashes": [
@ -911,11 +935,11 @@
}, },
"platformdirs": { "platformdirs": {
"hashes": [ "hashes": [
"sha256:024996549ee88ec1a9aa99ff7f8fc819bb59e2c3477b410d90a16d32d6e707aa", "sha256:d5b638ca397f25f979350ff789db335903d7ea010ab28903f57b27e1b16c2b08",
"sha256:e5986afb596e4bb5bde29a79ac9061aa955b94fca2399b7aaac4090860920dd8" "sha256:ebe11c0d7a805086e99506aa331612429a72ca7cd52a1f0d277dc4adc20cb10e"
], ],
"markers": "python_version >= '3.7'", "markers": "python_version >= '3.7'",
"version": "==3.1.1" "version": "==3.2.0"
}, },
"pycodestyle": { "pycodestyle": {
"hashes": [ "hashes": [

View File

@ -1,4 +1,10 @@
test: upgrade_basic_after upgrade_type_after upgrade_ref2ref_after upgrade_distributed_function_after upgrade_rebalance_strategy_after upgrade_list_citus_objects upgrade_autoconverted_after upgrade_citus_stat_activity upgrade_citus_locks upgrade_distributed_triggers_after test: upgrade_basic_after upgrade_ref2ref_after upgrade_type_after upgrade_distributed_function_after upgrade_rebalance_strategy_after upgrade_list_citus_objects upgrade_autoconverted_after upgrade_citus_stat_activity upgrade_citus_locks
# This test cannot be run with run_test.py currently due to its dependence on
# the specific PG versions that we use to run upgrade tests. For now we leave
# it out of the parallel line, so that flaky test detection can at least work
# for the other tests.
test: upgrade_distributed_triggers_after
# This attempts dropping citus extension (and rollbacks), so please do # This attempts dropping citus extension (and rollbacks), so please do
# not run in parallel with any other tests. # not run in parallel with any other tests.

View File

@ -15,6 +15,8 @@ WORKER2 = "worker2"
REGULAR_USER_NAME = "regularuser" REGULAR_USER_NAME = "regularuser"
SUPER_USER_NAME = "postgres" SUPER_USER_NAME = "postgres"
DATABASE_NAME = "postgres"
ARBITRARY_SCHEDULE_NAMES = [ ARBITRARY_SCHEDULE_NAMES = [
"create_schedule", "create_schedule",
"sql_schedule", "sql_schedule",
@ -96,6 +98,7 @@ class CitusBaseClusterConfig(object, metaclass=NewInitCaller):
self.temp_dir = CITUS_ARBITRARY_TEST_DIR self.temp_dir = CITUS_ARBITRARY_TEST_DIR
self.worker_amount = 2 self.worker_amount = 2
self.user = REGULAR_USER_NAME self.user = REGULAR_USER_NAME
self.dbname = DATABASE_NAME
self.is_mx = True self.is_mx = True
self.is_citus = True self.is_citus = True
self.name = type(self).__name__ self.name = type(self).__name__

View File

@ -0,0 +1,2 @@
__pycache__
*-env

View File

@ -0,0 +1,223 @@
## Goal of the Tool
Tool supports a simple syntax to be useful to generate queries with different join orders. Main motivation for me to create the tool was to compare results of the generated queries for different [Citus](https://github.com/citusdata/citus) tables and Postgres tables. That is why we support a basic syntax for now. It can be extended to support different queries.
## Query Generator for Postgres
Tool generates SELECT queries, whose depth can be configured, with different join orders. It also generates DDLs required for query execution.
You can also tweak configuration parameters for data inserting command generation.
## How to Run Citus Join Verification?
You can verify if Citus breaks any default PG join behaviour via `citus_compare_dist_local_joins.sh`. It creates
tables specified in config. Then, it runs generated queries on those tables and saves the results into `out/dist_queries.out`.
After running those queries for Citus tables, it creates PG tables with the same names as previous run, executes the same
queries, and saves the results into `out/local_queries.out`. In final step, it generates diff between local and distributed results.
You can see the contents of `out/local_dist_diffs` to see if there is any Citus unsupported query.
1. Create a Citus local cluster with 2 workers by using [citus_dev](https://github.com/citusdata/tools/tree/develop/citus_dev) tool
(Note: make sure you do not configure psql via .psqlrc file as it would fail the test.)
```bash
citus_dev make testCluster --destroy
```
2. Run the test,
```bash
cd src/test/regress/citus_tests/query_generator/bin
bash citus_compare_dist_local_joins.sh <username> <dbname> <coordinator_port>
```
3. See the diff content in `src/test/regress/citus_tests/query_generator/out/local_dist_diffs`
### Configuration
You can configure 3 different parts:
- [DDL Configuration](#ddl-configuration)
- [Data Insertion Configuration](#data-insertion-configuration)
- [Query Configuration](#query-configuration)
## DDL Configuration
Tool generates related ddl commands before generating queries.
Schema for DDL configuration:
```yaml
ddlOutFile: <string>
commonColName: <string>
targetTables: <Table[]>
- Table:
name: <string>
citusType: <CitusType>
maxAllowedUseOnQuery: <int>
rowCount: <int>
nullRate: <float>
duplicateRate: <float>
columns: <Column[]>
- Column:
name: <string>
type: <string>
distinctCopyCount: <int>
```
Explanation:
```yaml
ddlOutFile: "file to write generated DDL commands"
commonColName: "name of the column that will be used as distribution column, filter column in restrictions and target column in selections"
targetTables: "array of tables that will be used in generated queries"
- Table:
name: "name prefix of table"
citusType: "citus type of table"
maxAllowedUseOnQuery: "limits how many times table can appear in query"
rowCount: "total # of rows that will be inserted into table"
nullRate: "percentage of null rows in rowCount that will be inserted into table"
duplicateRate: "percentage of duplicates in rowCount that will be inserted into table"
columns: "array of columns in table"
- Column:
name: "name of column"
type: "name of data type of column(only support 'int' now)"
distinctCopyCount: "how many tables with the same configuration we should create(only by changing full name, still using the same name prefix)"
```
## Data Insertion Configuration
Tool generates data insertion commands if you want tables with filled data. You can configure total number of rows, what percentage of them should
be null and what percentage of them should be duplicated. For related configuration see Table schema at [DDL Configuration](#ddl-configuration). You
can also configure range of the randomly generated data. See `dataRange` at [Query Configuration](#query-configuration).
## Query Configuration
After generation of ddls and data insertion commands, the tool generates queries.
Schema for Query configuration:
```yaml
queryCount: <int>
queryOutFile: <string>
semiAntiJoin: <bool>
cartesianProduct: <bool>
limit: <bool>
orderby: <bool>
forceOrderbyWithLimit: <bool>
useAvgAtTopLevelTarget: <bool>
dataRange:
from: <int>
to: <int>
filterRange:
from: <int>
to: <int>
limitRange:
from: <int>
to: <int>
targetRteCount: <int>
targetCteCount: <int>
targetCteRteCount: <int>
targetJoinTypes: <JoinType[]>
targetRteTypes: <RteType[]>
targetRestrictOps: <RestrictOp[]>
```
Explanation:
```yaml
queryCount: "number of queries to generate"
queryOutFile: "file to write generated queries"
semiAntiJoin: "should we support semi joins (WHERE col IN (Subquery))"
cartesianProduct: "should we support cartesian joins"
limit: "should we support limit clause"
orderby: "should we support order by clause"
forceOrderbyWithLimit: "should we force order by when we use limit"
useAvgAtTopLevelTarget: "should we make top level query as select avg() from (subquery)"
dataRange:
from: "starting boundary for data generation"
to: "end boundary for data generation"
filterRange:
from: "starting boundary for restriction clause"
to: "end boundary for restriction clause"
limitRange:
from: "starting boundary for limit clause"
to: "end boundary for data limit clause"
targetRteCount: "limits how many rtes should exist in non-cte part of the query"
targetCteCount: "limits how many ctes should exist in query"
targetCteRteCount: "limits how many rtes should exist in cte part of the query"
targetJoinTypes: "supported join types"
targetRteTypes: "supported rte types"
targetRestrictOps: "supported restrict ops"
```
## Misc Configuration
Tool has some configuration options which does not suit above 3 parts.
Schema for misc configuration:
```yaml
interactiveMode: <bool>
```
Explanation:
```yaml
interactiveMode: "when true, interactively prints generated ddls and queries. Otherwise, it writes them to configured files."
```
## Supported Operations
It uses `commonColName` for any kind of target selection required for any supported query clause.
### Column Type Support
Tool currently supports only int data type, but plans to support other basic types.
### Join Support
Tool supports following joins:
```yaml
targetJoinTypes:
- INNER
- LEFT
- RIGHT
- FULL
```
### Citus Table Support
Tool supports following citus table types:
```yaml
targetTables:
- Table:
...
citusType: <one of (DISTRIBUTED || REFERENCE || POSTGRES)>
...
```
### Restrict Operation Support
Tool supports following restrict operations:
```yaml
targetRestrictOps:
- LT
- GT
- EQ
```
### Rte Support
Tool supports following rtes:
```yaml
targetRteTypes:
- RELATION
- SUBQUERY
- CTE
- VALUES
```
## How to Generate Queries?
You have 2 different options.
- [Interactive Mode](#interactive-mode)
- [File Mode](#file-mode)
### Interactive Mode
In this mode, you will be prompted to continue generating a query. When you hit to `Enter`, it will continue generating them.
You will need to hit to `x` to exit.
1. Configure `interactiveMode: true` in config.yml,
2. Run the command shown below
```bash
cd src/test/regress/citus_tests/query_generator
python generate_queries.py
```
### File Mode
In this mode, generated ddls and queries will be written into the files configured in [config.yml](./config/config.yaml).
1. Configure `interactiveMode: false`,
2. Configure `queryCount: <total_query>`,
3. Configure `queryOutFile: <query_file_path>` and `ddlOutFile: <ddlfile_path>`
4. Run the command shown below
```bash
cd src/test/regress/citus_tests/query_generator
python generate_queries.py
```

View File

@ -0,0 +1,60 @@
#!/bin/bash
# make bash behave
set -euo pipefail
psql_user=$1
psql_db=$2
psql_port=$3
runDDLs()
{
# run ddls
psql -U "${psql_user}" -d "${psql_db}" -p "${psql_port}" -f "${out_folder}"/ddls.sql > /dev/null
}
runUndistributeTables()
{
undistribute_all_tables_command='SELECT undistribute_table(logicalrelid) FROM pg_dist_partition;'
# run undistribute all tables
psql -U "${psql_user}" -d "${psql_db}" -p "${psql_port}" -c "${undistribute_all_tables_command}" > /dev/null
}
runQueries()
{
out_filename=$1
# run dmls
# echo queries and comments for query tracing
psql -U "${psql_user}" -d "${psql_db}" -p "${psql_port}" \
--echo-all \
-f "${out_folder}"/queries.sql > "${out_filename}" 2>&1
}
showDiffs()
{
pushd . && cd "${script_folder}" && python3 diff-checker.py && popd
}
# run query generator and let it create output ddls and queries
script_folder=$(dirname "$0")
out_folder="${script_folder}"/../out
pushd . && cd "${script_folder}"/.. && python3 generate_queries.py && popd
# remove result files if exists
rm -rf "${out_folder}"/dist_queries.out "${out_folder}"/local_queries.out
# run ddls
runDDLs
# runs dmls for distributed tables
runQueries "${out_folder}"/dist_queries.out
# undistribute all dist tables
runUndistributeTables
# runs the same dmls for pg local tables
runQueries "${out_folder}"/local_queries.out
# see diffs in results
showDiffs

View File

@ -0,0 +1,122 @@
import os
import re
import subprocess
import sys
def createPatternForFailedQueryBlock(acceptableErrors):
totalAcceptableOrders = len(acceptableErrors)
failedQueryBlockPattern = "-- queryId: [0-9]+\n.*\npsql:.*(?:"
for acceptableErrorIdx in range(totalAcceptableOrders):
failedQueryBlockPattern += acceptableErrors[acceptableErrorIdx]
if acceptableErrorIdx < totalAcceptableOrders - 1:
failedQueryBlockPattern += "|"
failedQueryBlockPattern += ")"
return failedQueryBlockPattern
def findFailedQueriesFromFile(queryOutFile, acceptableErrors):
failedQueryIds = []
outFileContent = ""
failedQueryBlockPattern = createPatternForFailedQueryBlock(acceptableErrors)
queryIdPattern = "queryId: ([0-9]+)"
with open(queryOutFile, "r") as f:
outFileContent = f.read()
failedQueryContents = re.findall(failedQueryBlockPattern, outFileContent)
failedQueryIds = [
re.search(queryIdPattern, failedQueryContent)[1]
for failedQueryContent in failedQueryContents
]
return failedQueryIds
def removeFailedQueryOutputFromFile(outFile, failedQueryIds):
if len(failedQueryIds) == 0:
return
distOutFileContentAsLines = []
with open(outFile, "r") as f:
distOutFileContentAsLines = f.readlines()
with open(outFile, "w") as f:
clear = False
nextIdx = 0
nextQueryIdToDelete = failedQueryIds[nextIdx]
queryIdPattern = "queryId: ([0-9]+)"
for line in distOutFileContentAsLines:
matched = re.search(queryIdPattern, line)
# founded line which contains query id
if matched:
# query id matches with the next failed query's id
if nextQueryIdToDelete == matched[1]:
# clear lines until we find succesfull query
clear = True
nextIdx += 1
if nextIdx < len(failedQueryIds):
nextQueryIdToDelete = failedQueryIds[nextIdx]
else:
# we found successfull query
clear = False
if not clear:
f.write(line)
return
def removeFailedQueryOutputFromFiles(distQueryOutFile, localQueryOutFile):
# remove the failed distributed query from both local and distributed query files to prevent diff for acceptable errors
# some of generated queries fails with below errors due to https://github.com/citusdata/citus/issues/6653, we skip those until we support them
acceptableErrors = [
"ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns",
"ERROR: recursive complex joins are only supported when all distributed tables are co-located and joined on their distribution columns",
]
failedDistQueryIds = findFailedQueriesFromFile(distQueryOutFile, acceptableErrors)
removeFailedQueryOutputFromFile(distQueryOutFile, failedDistQueryIds)
removeFailedQueryOutputFromFile(localQueryOutFile, failedDistQueryIds)
return
def showDiffs(distQueryOutFile, localQueryOutFile, diffFile):
exitCode = 1
with open(diffFile, "w") as f:
diffCommand = "diff -u {} {}".format(localQueryOutFile, distQueryOutFile)
process = subprocess.Popen(diffCommand.split(), stdout=f, stderr=f, shell=False)
process.wait()
exitCode = process.returncode
print("diff exit {}".format(exitCode))
return exitCode
def exitIfAnyLocalQueryFailed(localQueryOutFile):
allErrors = ["ERROR:"]
failedLocalQueryIds = findFailedQueriesFromFile(localQueryOutFile, allErrors)
assert (
len(failedLocalQueryIds) == 0
), """There might be an internal error related to query generator or
we might find a Postgres bug. Check local_queries.out to see the error."""
return
if __name__ == "__main__":
scriptDirPath = os.path.dirname(os.path.abspath(__file__))
outFolderPath = scriptDirPath + "/../out"
localQueryOutFile = "{}/local_queries.out".format(outFolderPath)
distQueryOutFile = "{}/dist_queries.out".format(outFolderPath)
diffFile = "{}/local_dist.diffs".format(outFolderPath)
# exit if we have any error from local queries
exitIfAnyLocalQueryFailed(localQueryOutFile)
# find failed queries from distQueryOutFile and then remove failed queries and their results from
# both distQueryOutFile and localQueryOutFile
removeFailedQueryOutputFromFiles(distQueryOutFile, localQueryOutFile)
# show diffs in unified format
exitCode = showDiffs(distQueryOutFile, localQueryOutFile, diffFile)
sys.exit(exitCode)

View File

@ -0,0 +1,63 @@
#!/usr/bin/env python3
"""query_gen_test
Usage:
run_query_compare_test --bindir=<bindir> --pgxsdir=<pgxsdir>
Options:
--bindir=<bindir> PostgreSQL executable directory(ex: '~/.pgenv/pgsql-10.4/bin')
--pgxsdir=<pgxsdir> Path to the PGXS directory(ex: ~/.pgenv/src/postgresql-11.3)
"""
import os
import subprocess
import sys
from docopt import docopt
# https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time/14132912#14132912
sys.path.append(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
)
# ignore E402 because these imports require addition to path
import common # noqa: E402
import config as cfg # noqa: E402
def run_test(config):
# start cluster
common.initialize_temp_dir(cfg.CITUS_ARBITRARY_TEST_DIR)
common.initialize_citus_cluster(
config.bindir, config.datadir, config.settings, config
)
# run test
scriptDirPath = os.path.dirname(os.path.abspath(__file__))
testRunCommand = "bash {}/citus_compare_dist_local_joins.sh {} {} {}".format(
scriptDirPath, config.user, config.dbname, config.coordinator_port()
)
process = subprocess.Popen(
testRunCommand.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
stdout, stderr = process.communicate()
# stop cluster
common.stop_databases(
config.bindir, config.datadir, config.node_name_to_ports, config.name
)
print(stdout)
print(stderr)
print(process.returncode)
sys.exit(process.returncode)
if __name__ == "__main__":
citusClusterConfig = cfg.CitusSuperUserDefaultClusterConfig(
docopt(__doc__, version="run_query_compare_test")
)
run_test(citusClusterConfig)

View File

@ -0,0 +1,129 @@
import copy
import yaml
from config.config_parser import (
parseJoinTypeArray,
parseRange,
parseRestrictOpArray,
parseRteTypeArray,
parseTableArray,
)
from node_defs import CitusType
class Config:
def __init__(self):
configObj = Config.parseConfigFile("config/config.yaml")
self.targetTables = _distinctCopyTables(
parseTableArray(configObj["targetTables"])
)
self.targetJoinTypes = parseJoinTypeArray(configObj["targetJoinTypes"])
self.targetRteTypes = parseRteTypeArray(configObj["targetRteTypes"])
self.targetRestrictOps = parseRestrictOpArray(configObj["targetRestrictOps"])
self.commonColName = configObj["commonColName"]
self.targetRteCount = configObj["targetRteCount"]
self.targetCteCount = configObj["targetCteCount"]
self.targetCteRteCount = configObj["targetCteRteCount"]
self.semiAntiJoin = configObj["semiAntiJoin"]
self.cartesianProduct = configObj["cartesianProduct"]
self.limit = configObj["limit"]
self.orderby = configObj["orderby"]
self.forceOrderbyWithLimit = configObj["forceOrderbyWithLimit"]
self.useAvgAtTopLevelTarget = configObj["useAvgAtTopLevelTarget"]
self.interactiveMode = configObj["interactiveMode"]
self.queryOutFile = configObj["queryOutFile"]
self.ddlOutFile = configObj["ddlOutFile"]
self.queryCount = configObj["queryCount"]
self.dataRange = parseRange(configObj["dataRange"])
self.filterRange = parseRange(configObj["filterRange"])
self.limitRange = parseRange(configObj["limitRange"])
self._ensureRteLimitsAreSane()
# print(self)
def __repr__(self):
rep = "targetRteCount: {}\n".format(self.targetRteCount)
rep += "targetCteCount: {}\n".format(self.targetCteCount)
rep += "targetCteRteCount: {}\n".format(self.targetCteRteCount)
rep += "targetRteTypes:\n"
for rteType in self.targetRteTypes:
rep += "\t{}\n".format(rteType)
rep += "targetJoinTypes:\n"
for joinType in self.targetJoinTypes:
rep += "\t{}\n".format(joinType)
rep += "restrictOps:\n"
for restrictOp in self.targetRestrictOps:
rep += "\t{}\n".format(restrictOp)
return rep
def _ensureRteLimitsAreSane(self):
totalAllowedUseForAllTables = 0
for table in self.targetTables:
totalAllowedUseForAllTables += table.maxAllowedUseOnQuery
assert (
totalAllowedUseForAllTables >= self.targetRteCount
), """targetRteCount cannot be greater than sum of maxAllowedUseOnQuery for all tables.
Set targetRteCount to a lower value or increase maxAllowedUseOnQuery for tables at config.yml."""
@staticmethod
def parseConfigFile(path):
try:
with open(path, "r") as configFile:
return yaml.load(configFile, yaml.Loader)
except IOError as e:
print(f"I/O error({0}): {1}".format(e.errno, e.strerror))
raise BaseException("cannot parse config.yaml")
except Exception:
print("Unexpected error while parsing config.yml.")
_config = None
def resetConfig():
global _config
_config = Config()
def getConfig():
return _config
def getAllTableNames():
"""returns table names from target tables given at config"""
tables = getConfig().targetTables
tableNames = [table.name for table in tables]
return tableNames
def getMaxAllowedCountForTable(tableName):
tables = getConfig().targetTables
filtered = filter(lambda el: el.name == tableName, tables)
filtered = list(filtered)
assert len(filtered) == 1
return filtered[0].maxAllowedUseOnQuery
def isTableDistributed(table):
return table.citusType == CitusType.DISTRIBUTED
def isTableReference(table):
return table.citusType == CitusType.REFERENCE
def _distinctCopyTables(tables):
distinctCopyTables = []
for table in tables:
distinctCopyCount = table.distinctCopyCount
for tblIdx in range(1, distinctCopyCount):
distinctCopyTable = copy.deepcopy(table)
distinctCopyTable.name += str(tblIdx)
distinctCopyTables.append(distinctCopyTable)
table.name += "0"
tables.extend(distinctCopyTables)
return tables

View File

@ -0,0 +1,65 @@
interactiveMode: false
queryCount: 250
queryOutFile: queries.sql
ddlOutFile: ddls.sql
semiAntiJoin: true
cartesianProduct: false
limit: true
orderby: true
forceOrderbyWithLimit: true
useAvgAtTopLevelTarget: true
dataRange:
from: 0
to: 10
filterRange:
from: 0
to: 10
limitRange:
from: 0
to: 10
targetRteCount: 40
targetCteCount: 3
targetCteRteCount: 2
commonColName: id
targetTables:
- Table:
name: dist
citusType: DISTRIBUTED
maxAllowedUseOnQuery: 10
rowCount: 10
nullRate: 0.1
duplicateRate: 0.1
columns:
- Column:
name: id
type: int
distinctCopyCount: 2
- Table:
name: ref
citusType: REFERENCE
maxAllowedUseOnQuery: 10
rowCount: 10
nullRate: 0.1
duplicateRate: 0.1
columns:
- Column:
name: id
type: int
distinctCopyCount: 2
targetJoinTypes:
- INNER
- LEFT
- RIGHT
- FULL
targetRteTypes:
- RELATION
- SUBQUERY
- CTE
targetRestrictOps:
- LT
- GT

View File

@ -0,0 +1,81 @@
from node_defs import CitusType, Column, JoinType, RestrictOp, RTEType, Table
def parseJoinType(joinTypeText):
return JoinType[joinTypeText]
def parseJoinTypeArray(joinTypeTexts):
joinTypes = []
for joinTypeText in joinTypeTexts:
joinType = parseJoinType(joinTypeText)
joinTypes.append(joinType)
return joinTypes
def parseRteType(rteTypeText):
return RTEType[rteTypeText]
def parseRteTypeArray(rteTypeTexts):
rteTypes = []
for rteTypeText in rteTypeTexts:
rteType = parseRteType(rteTypeText)
rteTypes.append(rteType)
return rteTypes
def parseRestrictOp(restrictOpText):
return RestrictOp[restrictOpText]
def parseRestrictOpArray(restrictOpTexts):
restrictOps = []
for restrictOpText in restrictOpTexts:
restrictOp = parseRestrictOp(restrictOpText)
restrictOps.append(restrictOp)
return restrictOps
def parseTable(targetTableDict):
name = targetTableDict["name"]
citusType = CitusType[targetTableDict["citusType"]]
maxAllowedUseOnQuery = targetTableDict["maxAllowedUseOnQuery"]
rowCount = targetTableDict["rowCount"]
nullRate = targetTableDict["nullRate"]
duplicateRate = targetTableDict["duplicateRate"]
columns = []
for columnDict in targetTableDict["columns"]:
col = parseColumn(columnDict)
columns.append(col)
distinctCopyCount = targetTableDict["distinctCopyCount"]
return Table(
name,
citusType,
maxAllowedUseOnQuery,
rowCount,
nullRate,
duplicateRate,
columns,
distinctCopyCount,
)
def parseTableArray(targetTableDicts):
tables = []
for targetTableDict in targetTableDicts:
table = parseTable(targetTableDict["Table"])
tables.append(table)
return tables
def parseColumn(targetColumnDict):
name = targetColumnDict["name"]
type = targetColumnDict["type"]
return Column(name, type)
def parseRange(rangeDict):
fromVal = rangeDict["from"]
toVal = rangeDict["to"]
return (fromVal, toVal)

View File

@ -0,0 +1,81 @@
from node_defs import CitusType
from config.config import getConfig
def getTableData():
dataGenerationSql = ""
tableIdx = 1
(fromVal, toVal) = getConfig().dataRange
tables = getConfig().targetTables
for table in tables:
# generate base rows
dataGenerationSql += _genOverlappingData(table.name, fromVal, table.rowCount)
dataGenerationSql += _genNonOverlappingData(table.name, toVal, tableIdx)
dataGenerationSql += "\n"
# generate null rows
if not table.citusType == CitusType.DISTRIBUTED:
targetNullRows = int(table.rowCount * table.nullRate)
dataGenerationSql += _genNullData(table.name, targetNullRows)
dataGenerationSql += "\n"
# generate duplicate rows
targetDuplicateRows = int(table.rowCount * table.duplicateRate)
dataGenerationSql += _genDupData(table.name, targetDuplicateRows)
dataGenerationSql += "\n\n"
tableIdx += 1
return dataGenerationSql
def _genOverlappingData(tableName, startVal, rowCount):
"""returns string to fill table with [startVal,startVal+rowCount] range of integers"""
dataGenerationSql = ""
dataGenerationSql += "INSERT INTO " + tableName
dataGenerationSql += (
" SELECT i FROM generate_series("
+ str(startVal)
+ ","
+ str(startVal + rowCount)
+ ") i;"
)
return dataGenerationSql
def _genNullData(tableName, nullCount):
"""returns string to fill table with NULLs"""
dataGenerationSql = ""
dataGenerationSql += "INSERT INTO " + tableName
dataGenerationSql += (
" SELECT NULL FROM generate_series(0," + str(nullCount) + ") i;"
)
return dataGenerationSql
def _genDupData(tableName, dupRowCount):
"""returns string to fill table with duplicate integers which are fetched from given table"""
dataGenerationSql = ""
dataGenerationSql += "INSERT INTO " + tableName
dataGenerationSql += (
" SELECT * FROM "
+ tableName
+ " ORDER BY "
+ getConfig().commonColName
+ " LIMIT "
+ str(dupRowCount)
+ ";"
)
return dataGenerationSql
def _genNonOverlappingData(tableName, startVal, tableIdx):
"""returns string to fill table with different integers for given table"""
startVal = startVal + tableIdx * 20
endVal = startVal + 20
dataGenerationSql = ""
dataGenerationSql += "INSERT INTO " + tableName
dataGenerationSql += (
" SELECT i FROM generate_series(" + str(startVal) + "," + str(endVal) + ") i;"
)
return dataGenerationSql

View File

@ -0,0 +1,48 @@
from config.config import getConfig, isTableDistributed, isTableReference
def getTableDDLs():
ddls = ""
tables = getConfig().targetTables
for table in tables:
ddls += _genTableDDL(table)
ddls += "\n"
return ddls
def _genTableDDL(table):
ddl = ""
ddl += "DROP TABLE IF EXISTS " + table.name + ";"
ddl += "\n"
ddl += "CREATE TABLE " + table.name + "("
for column in table.columns[:-1]:
ddl += _genColumnDDL(column)
ddl += ",\n"
if len(table.columns) > 0:
ddl += _genColumnDDL(table.columns[-1])
ddl += ");\n"
if isTableDistributed(table):
ddl += (
"SELECT create_distributed_table("
+ "'"
+ table.name
+ "','"
+ getConfig().commonColName
+ "'"
+ ");"
)
ddl += "\n"
elif isTableReference(table):
ddl += "SELECT create_reference_table(" + "'" + table.name + "'" + ");"
ddl += "\n"
return ddl
def _genColumnDDL(column):
ddl = ""
ddl += column.name
ddl += " "
ddl += column.type
return ddl

View File

@ -0,0 +1,65 @@
import signal
import sys
from data_gen import getTableData
from ddl_gen import getTableDDLs
from query_gen import newQuery
from config.config import getConfig, resetConfig
def _signal_handler(sig, frame):
sys.exit(0)
def _interactiveMode(ddls, data):
print(ddls)
print(data)
while True:
res = input("Press x to exit or Enter to generate more")
if res.lower() == "x":
print("Exit from query generation mode!")
sys.exit(0)
query = newQuery()
print(query)
resetConfig()
def _fileMode(ddls, data):
ddlFileName = "out/" + getConfig().ddlOutFile
with open(ddlFileName, "w") as ddlFile:
ddlFile.writelines([ddls, data])
queryCount = getConfig().queryCount
fileName = "out/" + getConfig().queryOutFile
with open(fileName, "w") as f:
# enable repartition joins due to https://github.com/citusdata/citus/issues/6865
enableRepartitionJoinCommand = "SET citus.enable_repartition_joins TO on;\n"
queryLines = [enableRepartitionJoinCommand]
queryId = 1
for _ in range(queryCount):
query = newQuery()
queryLine = "-- queryId: " + str(queryId) + "\n"
queryLine += query + "\n\n"
queryLines.append(queryLine)
queryId += 1
f.writelines(queryLines)
if __name__ == "__main__":
signal.signal(signal.SIGINT, _signal_handler)
resetConfig()
ddls = getTableDDLs()
data = getTableData()
if getConfig().interactiveMode:
_interactiveMode(ddls, data)
else:
_fileMode(ddls, data)

View File

@ -0,0 +1,55 @@
from enum import Enum
class JoinType(Enum):
INNER = 1
LEFT = 2
RIGHT = 3
FULL = 4
class RTEType(Enum):
RELATION = 1
SUBQUERY = 2
CTE = 3
VALUES = 4
class RestrictOp(Enum):
LT = 1
GT = 2
EQ = 3
class CitusType(Enum):
DISTRIBUTED = 1
REFERENCE = 2
POSTGRES = 3
class Table:
def __init__(
self,
name,
citusType,
maxAllowedUseOnQuery,
rowCount,
nullRate,
duplicateRate,
columns,
distinctCopyCount,
):
self.name = name
self.citusType = citusType
self.maxAllowedUseOnQuery = maxAllowedUseOnQuery
self.rowCount = rowCount
self.nullRate = nullRate
self.duplicateRate = duplicateRate
self.columns = columns
self.distinctCopyCount = distinctCopyCount
class Column:
def __init__(self, name, type):
self.name = name
self.type = type

View File

@ -0,0 +1,2 @@
*
!.gitignore

View File

@ -0,0 +1,434 @@
import random
from node_defs import RTEType
from random_selections import (
randomJoinOp,
randomRestrictOp,
randomRteType,
shouldSelectThatBranch,
)
from config.config import getAllTableNames, getConfig, getMaxAllowedCountForTable
# query_gen.py generates a new query from grammar rules below. It randomly chooses allowed rules
# to generate a query. Here are some important notes about the query generation:
#
# - We enforce the max allowed # of usage for each table. It also enforces global total # of tables.
# - Table names, restriction types and any other selections are chosen between the values provided by
# configuration file (config.yml).
# - Entry point for the generator is newQuery and all other methods are internal methods. We pass a context
# object named GeneratorContext to all internal methods as parameter to perform checks and generations
# on the query via the context.
# - shouldSelectThatBranch() is useful utility method to randomly chose a grammar rule. Some of the rules
# are only selected if we allowed them in configuration file.
# - We enforce table limits separately if we are inside cte part of the query(see targetCteRteCount).
# We also enforce max # of ctes for a query.
# - commonColName from the config is used at select and where clauses.
# - useAvgAtTopLevelTarget is useful to return single row as query result. It is also useful to track Citus
# query bugs via run_query_compare_test.py.
# - '=' restriction is removed from the config by default to return values different than null most of the time.
# - 'RTE.VALUES' is also removed from the config for the same reason as above.
# - Filter range is selected as same with data range for the same reason as above.
# - aliasStack at GeneratorContext is useful to put correct table names into where clause.
# grammar syntax
#
# ======Assumptions======
# 1. Tables has common dist col
# 2. All operations execute on common column for all tables.
#
# TODO: RTE_FUNCTION, RTE_TABLEFUNC
#
# ====SYNTAX====
# ===Nonterminals===
# Query
# SelectExpr
# FromExpr
# RestrictExpr
# RteList
# Rte
# SubqueryRte
# RelationRte
# JoinList
# JoinOp
# Using
# RestrictList
# Restrict
# CteRte
# CteList
# Cte
# ValuesRte
# Limit
# OrderBy
#
# ===Terminals===
# e 'SELECT' 'FROM' 'INNER JOIN' 'LEFT JOIN' 'RIGHT JOIN' 'FULL JOIN' 'WHERE' 'LIMIT' 'USING' 'WITH'
# 'ORDER BY' 'VALUES' 'IN' 'NOT' 'AS' '<' '>' '=' '*' ',' ';' '(' ')'
#
# ===Rules===
# Start -> Query ';' || 'WITH' CteList Query ';'
# Query -> SelectExpr FromExpr [OrderBy] [Limit] || 'SELECT' 'avg(avgsub.DistColName)' 'FROM' SubqueryRte 'AS avgsub'
# SelectExpr -> 'SELECT' 'curAlias()' '.' DistColName
# FromExpr -> 'FROM' (Rte JoinList JoinOp Rte Using || RteList) ['WHERE' 'nextRandomAlias()' '.' DistColName RestrictExpr]
# RestrictExpr -> ('<' || '>' || '=') Int || ['NOT'] 'IN' SubqueryRte
# JoinList -> JoinOp Rte Using JoinList || e
# Using -> 'USING' '(' DistColName ')'
# RteList -> Rte [, RteList] || Rte
# Rte -> SubqueryRte 'AS' 'nextRandomAlias()' || RelationRte 'AS' 'nextRandomAlias()' ||
# CteRte 'AS' 'nextRandomAlias()' || ValuesRte 'AS' 'nextRandomAlias()'
# SubqueryRte -> '(' Query ')'
# RelationRte -> 'nextRandomTableName()'
# CteRte -> 'randomCteName()'
# CteList -> Cte [',' CteList] || Cte
# Cte -> 'nextRandomAlias()' 'AS' '(' Query ')'
# ValuesRte -> '(' 'VALUES' '(' 'random()' ')' ')'
# JoinOp -> 'INNER JOIN' || 'LEFT JOIN' || 'RIGHT JOIN' || 'FULL JOIN'
# Limit -> 'LIMIT' 'random()'
# OrderBy -> 'ORDER BY' DistColName
# DistColName -> 'hardwired(get from config)'
class GeneratorContext:
"""context to store some variables which should be available during generation"""
def __init__(self):
# each level's last table is used in WHERE clause for the level
self.aliasStack = []
# tracks if we are inside cte as we should not refer cte inside cte
self.insideCte = False
# total rtes in cte + non-cte parts
self.totalRteCount = 0
# rte count in non-cte part to enforce non-cte rte limit
self.currentRteCount = 0
# cte count to enforce cte limit
self.currentCteCount = 0
# rte count in cte part to enforce rte limit in cte
self.currentCteRteCount = 0
# rte counts per table to enforce rte limit per table
self.perTableRtes = {}
# tables which hit count limit
self.disallowedTables = set()
# useful to track usage avg only at first select
self.usedAvg = False
def randomCteName(self):
"""returns a randomly selected cte name"""
randCteRef = random.randint(0, self.currentCteCount - 1)
return " cte_" + str(randCteRef)
def curAlias(self):
"""returns current alias name to be used for the current table"""
return " table_" + str(self.totalRteCount)
def curCteAlias(self):
"""returns current alias name to be used for the current cte"""
return " cte_" + str(self.currentCteCount)
def hasAnyCte(self):
"""returns if context has any cte"""
return self.currentCteCount > 0
def canGenerateNewRte(self):
"""checks if context exceeds allowed rte count"""
return self.currentRteCount < getConfig().targetRteCount
def canGenerateNewCte(self):
"""checks if context exceeds allowed cte count"""
return self.currentCteCount < getConfig().targetCteCount
def canGenerateNewRteInsideCte(self):
"""checks if context exceeds allowed rte count inside a cte"""
return self.currentCteRteCount < getConfig().targetCteRteCount
def addAlias(self, alias):
"""adds given alias to the stack"""
self.aliasStack.append(alias)
def removeLastAlias(self):
"""removes the latest added alias from the stack"""
return self.aliasStack.pop()
def getRteNameEnforcingRteLimits(self):
"""returns rteName by enforcing rte count limits for tables"""
# do not enforce per table rte limit if we are inside cte
if self.insideCte:
rteName = random.choice(getAllTableNames())
return " " + rteName + " "
while True:
# keep trying to find random table by eliminating the ones which hit table limit
allowedNames = set(getAllTableNames()) - self.disallowedTables
assert len(allowedNames) > 0
rteName = random.choice(list(allowedNames))
# not yet added to rte count map, so we can allow the name
if rteName not in self.perTableRtes:
self.perTableRtes[rteName] = 0
break
# limit is not exceeded, so we can allow the name
if self.perTableRtes[rteName] < getMaxAllowedCountForTable(rteName):
break
else:
self.disallowedTables.add(rteName)
# increment rte count for the table name
self.perTableRtes[rteName] += 1
return " " + rteName + " "
def newQuery():
"""returns generated query"""
genCtx = GeneratorContext()
return _start(genCtx)
def _start(genCtx):
# Query ';' || 'WITH' CteList Query ';'
query = ""
if not genCtx.canGenerateNewCte() or shouldSelectThatBranch():
query += _genQuery(genCtx)
else:
genCtx.insideCte = True
query += " WITH "
query += _genCteList(genCtx)
genCtx.insideCte = False
query += _genQuery(genCtx)
query += ";"
return query
def _genQuery(genCtx):
# SelectExpr FromExpr [OrderBy] [Limit] || 'SELECT' 'avg(avgsub.DistColName)' 'FROM' SubqueryRte 'AS avgsub'
query = ""
if (
getConfig().useAvgAtTopLevelTarget
and not genCtx.insideCte
and not genCtx.usedAvg
):
genCtx.usedAvg = True
query += "SELECT "
query += "count(*), avg(avgsub." + getConfig().commonColName + ") FROM "
query += _genSubqueryRte(genCtx)
query += " AS avgsub"
else:
query += _genSelectExpr(genCtx)
query += _genFromExpr(genCtx)
choseOrderby = False
if getConfig().orderby and shouldSelectThatBranch():
query += _genOrderBy(genCtx)
choseOrderby = True
if getConfig().limit and shouldSelectThatBranch():
if not choseOrderby and getConfig().forceOrderbyWithLimit:
query += _genOrderBy(genCtx)
query += _genLimit(genCtx)
return query
def _genOrderBy(genCtx):
# 'ORDER BY' DistColName
query = ""
query += " ORDER BY "
query += getConfig().commonColName + " "
return query
def _genLimit(genCtx):
# 'LIMIT' 'random()'
query = ""
query += " LIMIT "
(fromVal, toVal) = getConfig().limitRange
query += str(random.randint(fromVal, toVal))
return query
def _genSelectExpr(genCtx):
# 'SELECT' 'curAlias()' '.' DistColName
query = ""
query += " SELECT "
commonColName = getConfig().commonColName
query += genCtx.curAlias() + "." + commonColName + " "
return query
def _genFromExpr(genCtx):
# 'FROM' (Rte JoinList JoinOp Rte Using || RteList) ['WHERE' 'nextRandomAlias()' '.' DistColName RestrictExpr]
query = ""
query += " FROM "
if shouldSelectThatBranch():
query += _genRte(genCtx)
query += _genJoinList(genCtx)
query += randomJoinOp()
query += _genRte(genCtx)
query += _genUsing(genCtx)
else:
query += _genRteList(genCtx)
alias = genCtx.removeLastAlias()
if shouldSelectThatBranch():
query += " WHERE "
query += alias + "." + getConfig().commonColName
query += _genRestrictExpr(genCtx)
return query
def _genRestrictExpr(genCtx):
# ('<' || '>' || '=') Int || ['NOT'] 'IN' '(' SubqueryRte ')'
query = ""
if (
not getConfig().semiAntiJoin
or not genCtx.canGenerateNewRte()
or shouldSelectThatBranch()
):
query += randomRestrictOp()
(fromVal, toVal) = getConfig().filterRange
query += str(random.randint(fromVal, toVal))
else:
if shouldSelectThatBranch():
query += " NOT"
query += " IN "
query += _genSubqueryRte(genCtx)
return query
def _genCteList(genCtx):
# Cte [',' CteList] || Cte
query = ""
if shouldSelectThatBranch():
query += _genCte(genCtx)
if not genCtx.canGenerateNewCte():
return query
query += ","
query += _genCteList(genCtx)
else:
query += _genCte(genCtx)
return query
def _genCte(genCtx):
# 'nextRandomAlias()' 'AS' '(' Query ')'
query = ""
query += genCtx.curCteAlias()
genCtx.currentCteCount += 1
query += " AS "
query += " ( "
query += _genQuery(genCtx)
query += " ) "
return query
def _genRteList(genCtx):
# RteList -> Rte [, RteList] || Rte
query = ""
if getConfig().cartesianProduct and shouldSelectThatBranch():
query += _genRte(genCtx)
if not genCtx.canGenerateNewRte():
return query
if genCtx.insideCte and not genCtx.canGenerateNewRteInsideCte():
return query
query += ","
query += _genRteList(genCtx)
else:
query += _genRte(genCtx)
return query
def _genJoinList(genCtx):
# JoinOp Rte Using JoinList || e
query = ""
if shouldSelectThatBranch():
if not genCtx.canGenerateNewRte():
return query
if genCtx.insideCte and not genCtx.canGenerateNewRteInsideCte():
return query
query += randomJoinOp()
query += _genRte(genCtx)
query += _genUsing(genCtx)
query += _genJoinList(genCtx)
return query
def _genUsing(genCtx):
# 'USING' '(' DistColName ')'
query = ""
query += " USING (" + getConfig().commonColName + " ) "
return query
def _genRte(genCtx):
# SubqueryRte as 'nextRandomAlias()' || RelationRte as 'curAlias()' ||
# CteRte as 'curAlias()' || ValuesRte 'AS' 'nextRandomAlias()'
alias = genCtx.curAlias()
modifiedAlias = None
if genCtx.insideCte:
genCtx.currentCteRteCount += 1
else:
genCtx.currentRteCount += 1
genCtx.totalRteCount += 1
# donot dive into recursive subquery further if we hit into rte limit, replace it with relation rte
rteType = randomRteType()
if not genCtx.canGenerateNewRte():
rteType = RTEType.RELATION
# donot dive into recursive subquery further if we hit into rte in cte limit, replace it with relation rte
if genCtx.insideCte and not genCtx.canGenerateNewRteInsideCte():
rteType = RTEType.RELATION
# we cannot refer to cte if we are inside it or we donot have any cte
if (genCtx.insideCte or not genCtx.hasAnyCte()) and rteType == RTEType.CTE:
rteType = RTEType.RELATION
query = ""
if rteType == RTEType.SUBQUERY:
query += _genSubqueryRte(genCtx)
elif rteType == RTEType.RELATION:
query += _genRelationRte(genCtx)
elif rteType == RTEType.CTE:
query += _genCteRte(genCtx)
elif rteType == RTEType.VALUES:
query += _genValuesRte(genCtx)
modifiedAlias = alias + "(" + getConfig().commonColName + ") "
else:
raise BaseException("unknown RTE type")
query += " AS "
query += alias if not modifiedAlias else modifiedAlias
genCtx.addAlias(alias)
return query
def _genSubqueryRte(genCtx):
# '(' Query ')'
query = ""
query += " ( "
query += _genQuery(genCtx)
query += " ) "
return query
def _genRelationRte(genCtx):
# 'randomAllowedTableName()'
query = ""
query += genCtx.getRteNameEnforcingRteLimits()
return query
def _genCteRte(genCtx):
# 'randomCteName()'
query = ""
query += genCtx.randomCteName()
return query
def _genValuesRte(genCtx):
# '( VALUES(random()) )'
query = ""
(fromVal, toVal) = getConfig().dataRange
query += " ( VALUES(" + str(random.randint(fromVal, toVal)) + " ) ) "
return query

View File

@ -0,0 +1,39 @@
import random
from node_defs import RestrictOp
from config.config import getConfig
def shouldSelectThatBranch():
"""returns 0 or 1 randomly"""
return random.randint(0, 1)
def randomRteType():
"""returns a randomly selected RteType given at config"""
rtes = getConfig().targetRteTypes
return random.choice(rtes)
def randomJoinOp():
"""returns a randomly selected JoinOp given at config"""
joinTypes = getConfig().targetJoinTypes
return " " + random.choice(joinTypes).name + " JOIN"
def randomRestrictOp():
"""returns a randomly selected RestrictOp given at config"""
restrictOps = getConfig().targetRestrictOps
restrictOp = random.choice(restrictOps)
opText = ""
if restrictOp == RestrictOp.LT:
opText = "<"
elif restrictOp == RestrictOp.GT:
opText = ">"
elif restrictOp == RestrictOp.EQ:
opText = "="
else:
raise BaseException("Unknown restrict op")
return " " + opText + " "

View File

@ -15,7 +15,7 @@ from typing import Optional
import common import common
from common import REGRESS_DIR, capture, run from common import REGRESS_DIR, capture, run
from config import ARBITRARY_SCHEDULE_NAMES, MASTER_VERSION, CitusDefaultClusterConfig from config import ARBITRARY_SCHEDULE_NAMES, MASTER_VERSION, CitusBaseClusterConfig
def main(): def main():
@ -178,7 +178,7 @@ def run_schedule_with_python(schedule):
"--bindir": bindir, "--bindir": bindir,
} }
config = CitusDefaultClusterConfig(args) config = CitusBaseClusterConfig(args)
common.initialize_temp_dir(config.temp_dir) common.initialize_temp_dir(config.temp_dir)
common.initialize_citus_cluster( common.initialize_citus_cluster(
config.bindir, config.datadir, config.settings, config config.bindir, config.datadir, config.settings, config
@ -242,7 +242,7 @@ def default_base_schedule(test_schedule, args):
return None return None
if "pg_upgrade" in test_schedule: if "pg_upgrade" in test_schedule:
return "minimal_schedule" return "minimal_pg_upgrade_schedule"
if test_schedule in ARBITRARY_SCHEDULE_NAMES: if test_schedule in ARBITRARY_SCHEDULE_NAMES:
print(f"WARNING: Arbitrary config schedule ({test_schedule}) is not supported.") print(f"WARNING: Arbitrary config schedule ({test_schedule}) is not supported.")
@ -301,9 +301,21 @@ def test_dependencies(test_name, test_schedule, schedule_line, args):
if schedule_line_is_upgrade_after(schedule_line): if schedule_line_is_upgrade_after(schedule_line):
# upgrade_xxx_after tests always depend on upgrade_xxx_before # upgrade_xxx_after tests always depend on upgrade_xxx_before
test_names = schedule_line.split()[1:]
before_tests = []
# _after tests have implicit dependencies on _before tests
for test_name in test_names:
if "_after" in test_name:
before_tests.append(test_name.replace("_after", "_before"))
# the upgrade_columnar_before renames the schema, on which other
# "after" tests depend. So we make sure to execute it always.
if "upgrade_columnar_before" not in before_tests:
before_tests.append("upgrade_columnar_before")
return TestDeps( return TestDeps(
default_base_schedule(test_schedule, args), default_base_schedule(test_schedule, args),
[test_name.replace("_after", "_before")], before_tests,
) )
# before_ tests leave stuff around on purpose for the after tests. So they # before_ tests leave stuff around on purpose for the after tests. So they

View File

@ -9,31 +9,31 @@ SELECT * FROM pg_indexes WHERE schemaname = 'upgrade_basic' and tablename NOT LI
upgrade_basic | tp | tp_pkey | | CREATE UNIQUE INDEX tp_pkey ON upgrade_basic.tp USING btree (a) upgrade_basic | tp | tp_pkey | | CREATE UNIQUE INDEX tp_pkey ON upgrade_basic.tp USING btree (a)
(3 rows) (3 rows)
SELECT nextval('pg_dist_shardid_seq') = MAX(shardid)+1 FROM pg_dist_shard; SELECT nextval('pg_dist_shardid_seq') > MAX(shardid) FROM pg_dist_shard;
?column? ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
t t
(1 row) (1 row)
SELECT nextval('pg_dist_placement_placementid_seq') = MAX(placementid)+1 FROM pg_dist_placement; SELECT nextval('pg_dist_placement_placementid_seq') > MAX(placementid) FROM pg_dist_placement;
?column? ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
t t
(1 row) (1 row)
SELECT nextval('pg_dist_groupid_seq') = MAX(groupid)+1 FROM pg_dist_node; SELECT nextval('pg_dist_groupid_seq') > MAX(groupid) FROM pg_dist_node;
?column? ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
t t
(1 row) (1 row)
SELECT nextval('pg_dist_node_nodeid_seq') = MAX(nodeid)+1 FROM pg_dist_node; SELECT nextval('pg_dist_node_nodeid_seq') > MAX(nodeid) FROM pg_dist_node;
?column? ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
t t
(1 row) (1 row)
SELECT nextval('pg_dist_colocationid_seq') = MAX(colocationid)+1 FROM pg_dist_colocation; SELECT nextval('pg_dist_colocationid_seq') > MAX(colocationid) FROM pg_dist_colocation;
?column? ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
t t
@ -45,7 +45,7 @@ SELECT nextval('pg_dist_colocationid_seq') = MAX(colocationid)+1 FROM pg_dist_co
SELECT SELECT
CASE WHEN MAX(operation_id) IS NULL CASE WHEN MAX(operation_id) IS NULL
THEN true THEN true
ELSE nextval('pg_dist_operationid_seq') = MAX(operation_id)+1 ELSE nextval('pg_dist_operationid_seq') > MAX(operation_id)
END AS check_operationid END AS check_operationid
FROM pg_dist_cleanup; FROM pg_dist_cleanup;
check_operationid check_operationid
@ -56,7 +56,7 @@ SELECT
SELECT SELECT
CASE WHEN MAX(record_id) IS NULL CASE WHEN MAX(record_id) IS NULL
THEN true THEN true
ELSE nextval('pg_dist_cleanup_recordid_seq') = MAX(record_id)+1 ELSE nextval('pg_dist_cleanup_recordid_seq') > MAX(record_id)
END AS check_recordid END AS check_recordid
FROM pg_dist_cleanup; FROM pg_dist_cleanup;
check_recordid check_recordid
@ -93,8 +93,8 @@ SELECT sequence_name FROM information_schema.sequences
'pg_dist_groupid_seq', 'pg_dist_groupid_seq',
'pg_dist_node_nodeid_seq', 'pg_dist_node_nodeid_seq',
'pg_dist_colocationid_seq', 'pg_dist_colocationid_seq',
'pg_dist_operationid_seq', 'pg_dist_operationid_seq',
'pg_dist_cleanup_recordid_seq', 'pg_dist_cleanup_recordid_seq',
'pg_dist_background_job_job_id_seq', 'pg_dist_background_job_job_id_seq',
'pg_dist_background_task_task_id_seq', 'pg_dist_background_task_task_id_seq',
'pg_dist_clock_logical_seq' 'pg_dist_clock_logical_seq'

View File

@ -0,0 +1 @@
test: multi_test_helpers multi_test_helpers_superuser multi_test_catalog_views

View File

@ -3,24 +3,24 @@ BEGIN;
-- We have the tablename filter to avoid adding an alternative output for when the coordinator is in metadata vs when not -- We have the tablename filter to avoid adding an alternative output for when the coordinator is in metadata vs when not
SELECT * FROM pg_indexes WHERE schemaname = 'upgrade_basic' and tablename NOT LIKE 'r_%' ORDER BY tablename; SELECT * FROM pg_indexes WHERE schemaname = 'upgrade_basic' and tablename NOT LIKE 'r_%' ORDER BY tablename;
SELECT nextval('pg_dist_shardid_seq') = MAX(shardid)+1 FROM pg_dist_shard; SELECT nextval('pg_dist_shardid_seq') > MAX(shardid) FROM pg_dist_shard;
SELECT nextval('pg_dist_placement_placementid_seq') = MAX(placementid)+1 FROM pg_dist_placement; SELECT nextval('pg_dist_placement_placementid_seq') > MAX(placementid) FROM pg_dist_placement;
SELECT nextval('pg_dist_groupid_seq') = MAX(groupid)+1 FROM pg_dist_node; SELECT nextval('pg_dist_groupid_seq') > MAX(groupid) FROM pg_dist_node;
SELECT nextval('pg_dist_node_nodeid_seq') = MAX(nodeid)+1 FROM pg_dist_node; SELECT nextval('pg_dist_node_nodeid_seq') > MAX(nodeid) FROM pg_dist_node;
SELECT nextval('pg_dist_colocationid_seq') = MAX(colocationid)+1 FROM pg_dist_colocation; SELECT nextval('pg_dist_colocationid_seq') > MAX(colocationid) FROM pg_dist_colocation;
-- while testing sequences on pg_dist_cleanup, they return null in pg upgrade schedule -- while testing sequences on pg_dist_cleanup, they return null in pg upgrade schedule
-- but return a valid value in citus upgrade schedule -- but return a valid value in citus upgrade schedule
-- that's why we accept both NULL and MAX()+1 here -- that's why we accept both NULL and MAX()+1 here
SELECT SELECT
CASE WHEN MAX(operation_id) IS NULL CASE WHEN MAX(operation_id) IS NULL
THEN true THEN true
ELSE nextval('pg_dist_operationid_seq') = MAX(operation_id)+1 ELSE nextval('pg_dist_operationid_seq') > MAX(operation_id)
END AS check_operationid END AS check_operationid
FROM pg_dist_cleanup; FROM pg_dist_cleanup;
SELECT SELECT
CASE WHEN MAX(record_id) IS NULL CASE WHEN MAX(record_id) IS NULL
THEN true THEN true
ELSE nextval('pg_dist_cleanup_recordid_seq') = MAX(record_id)+1 ELSE nextval('pg_dist_cleanup_recordid_seq') > MAX(record_id)
END AS check_recordid END AS check_recordid
FROM pg_dist_cleanup; FROM pg_dist_cleanup;
SELECT nextval('pg_dist_background_job_job_id_seq') > COALESCE(MAX(job_id), 0) FROM pg_dist_background_job; SELECT nextval('pg_dist_background_job_job_id_seq') > COALESCE(MAX(job_id), 0) FROM pg_dist_background_job;