Query generator test tool (#6686)

- Query generator is used to create queries, allowed by the grammar which is documented at `query_generator/query_gen.py` (currently contains only joins). 
- This PR adds a CI test which utilizes the query generator to compare the results of generated queries that are executed on Citus tables and local (undistributed) tables. It fails if there is an unexpected error at results. The error can be related to Citus, the query generator, or even Postgres.
- The tool is configured by the file `query_generator/config/config.yaml`, which limits table counts at generated queries and sets many table related parameters (e.g. row count).
- Run time of the CI task can be configured from the config file. By default, we run 250 queries with maximum table count of 40 inside each query.
pull/6829/head
aykut-bozkurt 2023-04-23 20:28:26 +03:00 committed by GitHub
parent 08e2820c67
commit a6a7271e63
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 1695 additions and 116 deletions

View File

@ -6,7 +6,7 @@ orbs:
parameters:
image_suffix:
type: string
default: '-v3417e8d'
default: '-vabeb997'
pg13_version:
type: string
default: '13.10'
@ -421,6 +421,63 @@ jobs:
- coverage:
flags: 'test_<< parameters.pg_major >>,upgrade'
test-query-generator:
description: Expects that the generated queries that are run on distributed and local tables would have the same results
parameters:
pg_major:
description: 'postgres major version'
type: integer
image:
description: 'docker image to use as for the tests'
type: string
default: citus/failtester
image_tag:
description: 'docker image tag to use'
type: string
docker:
- image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>'
working_directory: /home/circleci/project
steps:
- checkout
- attach_workspace:
at: .
- install_extension:
pg_major: << parameters.pg_major >>
- configure
- enable_core
- run:
name: 'Run Test'
command: |
gosu circleci make -C src/test/regress check-query-generator
no_output_timeout: 5m
- run:
name: 'Show regressions'
command: |
find src/test/regress/citus_tests/query_generator/out/ -name "local_dist.diffs" -exec cat {} +
lines=$(find src/test/regress/citus_tests/query_generator/out/ -name "local_dist.diffs" | wc -l)
if [ $lines -ne 0 ]; then
exit 1
fi
when: on_fail
- run:
name: 'Copy logfiles'
command: |
mkdir src/test/regress/tmp_citus_test/logfiles
find src/test/regress/tmp_citus_test/ -name "logfile_*" -exec cp -t src/test/regress/tmp_citus_test/logfiles/ {} +
when: on_fail
- store_artifacts:
name: 'Save logfiles'
path: src/test/regress/tmp_citus_test/logfiles
- store_artifacts:
name: 'Save ddls'
path: src/test/regress/citus_tests/query_generator/out/ddls.sql
- store_artifacts:
name: 'Save dmls'
path: src/test/regress/citus_tests/query_generator/out/queries.sql
- stack_trace
- coverage:
flags: 'test_<< parameters.pg_major >>,querygen'
test-citus:
description: Runs the common tests of citus
parameters:
@ -935,6 +992,24 @@ workflows:
image_tag: '<< pipeline.parameters.pg15_version >>'
requires: [build-15]
- test-query-generator:
name: 'test-13_check-query-generator'
pg_major: 13
image_tag: '<< pipeline.parameters.pg13_version >>'
requires: [build-13]
- test-query-generator:
name: 'test-14_check-query-generator'
pg_major: 14
image_tag: '<< pipeline.parameters.pg14_version >>'
requires: [build-14]
- test-query-generator:
name: 'test-15_check-query-generator'
pg_major: 15
image_tag: '<< pipeline.parameters.pg15_version >>'
requires: [build-15]
- test-pg-upgrade:
name: 'test-13-14_check-pg-upgrade'
old_pg_major: 13
@ -975,6 +1050,7 @@ workflows:
- test-13_check-enterprise-failure
- test-13_check-split
- test-13_check-arbitrary-configs
- test-13_check-query-generator
- test-14_check-multi
- test-14_check-multi-1
- test-14_check-mx
@ -993,6 +1069,7 @@ workflows:
- test-14_check-enterprise-failure
- test-14_check-split
- test-14_check-arbitrary-configs
- test-14_check-query-generator
- test-15_check-multi
- test-15_check-multi-1
- test-15_check-mx
@ -1011,6 +1088,7 @@ workflows:
- test-15_check-enterprise-failure
- test-15_check-split
- test-15_check-arbitrary-configs
- test-15_check-query-generator
- test-13-14_check-pg-upgrade
- test-14-15_check-pg-upgrade
- test-13_check-citus-upgrade

View File

@ -34,6 +34,7 @@ MULTI_REGRESS_OPTS = --inputdir=$(citus_abs_srcdir) $(pg_regress_locale_flags) -
pg_upgrade_check = $(citus_abs_srcdir)/citus_tests/upgrade/pg_upgrade_test.py
citus_upgrade_check =CITUS_OLD_VERSION=$(citus-old-version) $(citus_abs_srcdir)/citus_tests/upgrade/citus_upgrade_test.py
arbitrary_config_check = $(citus_abs_srcdir)/citus_tests/arbitrary_configs/citus_arbitrary_configs.py
query_generator_check = $(citus_abs_srcdir)/citus_tests/query_generator/bin/run_query_compare_test.py
template_isolation_files = $(shell find $(citus_abs_srcdir)/spec/ -name '*.spec')
generated_isolation_files = $(patsubst $(citus_abs_srcdir)/spec/%,$(citus_abs_srcdir)/build/specs/%,$(template_isolation_files))
@ -44,7 +45,7 @@ vanilla_diffs_file = $(citus_abs_srcdir)/pg_vanilla_outputs/$(MAJORVERSION)/regr
# intermediate, for muscle memory backward compatibility.
check: check-full check-enterprise-full
# check-full triggers all tests that ought to be run routinely
check-full: check-multi check-multi-mx check-multi-1 check-operations check-follower-cluster check-isolation check-failure check-split check-vanilla check-columnar check-columnar-isolation check-pg-upgrade check-arbitrary-configs check-citus-upgrade check-citus-upgrade-mixed check-citus-upgrade-local check-citus-upgrade-mixed-local check-pytest
check-full: check-multi check-multi-mx check-multi-1 check-operations check-follower-cluster check-isolation check-failure check-split check-vanilla check-columnar check-columnar-isolation check-pg-upgrade check-arbitrary-configs check-citus-upgrade check-citus-upgrade-mixed check-citus-upgrade-local check-citus-upgrade-mixed-local check-pytest check-query-generator
# check-enterprise-full triggers all enterprise specific tests
check-enterprise-full: check-enterprise check-enterprise-isolation check-enterprise-failure check-enterprise-isolation-logicalrep-1 check-enterprise-isolation-logicalrep-2 check-enterprise-isolation-logicalrep-3
@ -262,6 +263,9 @@ check-arbitrary-base: all
check-pytest:
pytest -n auto
check-query-generator: all
${query_generator_check} --bindir=$(bindir) --pgxsdir=$(pgxsdir)
check-citus-upgrade: all
$(citus_upgrade_check) \
--bindir=$(bindir) \

View File

@ -15,6 +15,7 @@ pytest-asyncio = "*"
pytest-timeout = "*"
pytest-xdist = "*"
pytest-repeat = "*"
pyyaml = "*"
[dev-packages]
black = "*"

View File

@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "eb9ca3a7b05e76c7ac60179a1755f89600dfb215e02bf08c258d548df1d96025"
"sha256": "9568b1f3e4d4fd408e5e263f6346b0a4f479ac88e02f64bb79a9d482096e6a03"
},
"pipfile-spec": 6,
"requires": {
@ -24,14 +24,6 @@
"markers": "python_version >= '3.6'",
"version": "==3.4.1"
},
"attrs": {
"hashes": [
"sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836",
"sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"
],
"markers": "python_version >= '3.6'",
"version": "==22.2.0"
},
"blinker": {
"hashes": [
"sha256:471aee25f3992bd325afa3772f1063dbdbbca947a041b8b89466dc00d606f8b6"
@ -219,32 +211,28 @@
},
"cryptography": {
"hashes": [
"sha256:103e8f7155f3ce2ffa0049fe60169878d47a4364b277906386f8de21c9234aa1",
"sha256:23df8ca3f24699167daf3e23e51f7ba7334d504af63a94af468f468b975b7dd7",
"sha256:2725672bb53bb92dc7b4150d233cd4b8c59615cd8288d495eaa86db00d4e5c06",
"sha256:30b1d1bfd00f6fc80d11300a29f1d8ab2b8d9febb6ed4a38a76880ec564fae84",
"sha256:35d658536b0a4117c885728d1a7032bdc9a5974722ae298d6c533755a6ee3915",
"sha256:50cadb9b2f961757e712a9737ef33d89b8190c3ea34d0fb6675e00edbe35d074",
"sha256:5f8c682e736513db7d04349b4f6693690170f95aac449c56f97415c6980edef5",
"sha256:6236a9610c912b129610eb1a274bdc1350b5df834d124fa84729ebeaf7da42c3",
"sha256:788b3921d763ee35dfdb04248d0e3de11e3ca8eb22e2e48fef880c42e1f3c8f9",
"sha256:8bc0008ef798231fac03fe7d26e82d601d15bd16f3afaad1c6113771566570f3",
"sha256:8f35c17bd4faed2bc7797d2a66cbb4f986242ce2e30340ab832e5d99ae60e011",
"sha256:b49a88ff802e1993b7f749b1eeb31134f03c8d5c956e3c125c75558955cda536",
"sha256:bc0521cce2c1d541634b19f3ac661d7a64f9555135e9d8af3980965be717fd4a",
"sha256:bc5b871e977c8ee5a1bbc42fa8d19bcc08baf0c51cbf1586b0e87a2694dde42f",
"sha256:c43ac224aabcbf83a947eeb8b17eaf1547bce3767ee2d70093b461f31729a480",
"sha256:d15809e0dbdad486f4ad0979753518f47980020b7a34e9fc56e8be4f60702fac",
"sha256:d7d84a512a59f4412ca8549b01f94be4161c94efc598bf09d027d67826beddc0",
"sha256:e029b844c21116564b8b61216befabca4b500e6816fa9f0ba49527653cae2108",
"sha256:e8a0772016feeb106efd28d4a328e77dc2edae84dfbac06061319fdb669ff828",
"sha256:e944fe07b6f229f4c1a06a7ef906a19652bdd9fd54c761b0ff87e83ae7a30354",
"sha256:eb40fe69cfc6f5cdab9a5ebd022131ba21453cf7b8a7fd3631f45bbf52bed612",
"sha256:fa507318e427169ade4e9eccef39e9011cdc19534f55ca2f36ec3f388c1f70f3",
"sha256:ffd394c7896ed7821a6d13b24657c6a34b6e2650bd84ae063cf11ccffa4f1a97"
"sha256:05dc219433b14046c476f6f09d7636b92a1c3e5808b9a6536adf4932b3b2c440",
"sha256:0dcca15d3a19a66e63662dc8d30f8036b07be851a8680eda92d079868f106288",
"sha256:142bae539ef28a1c76794cca7f49729e7c54423f615cfd9b0b1fa90ebe53244b",
"sha256:3daf9b114213f8ba460b829a02896789751626a2a4e7a43a28ee77c04b5e4958",
"sha256:48f388d0d153350f378c7f7b41497a54ff1513c816bcbbcafe5b829e59b9ce5b",
"sha256:4df2af28d7bedc84fe45bd49bc35d710aede676e2a4cb7fc6d103a2adc8afe4d",
"sha256:4f01c9863da784558165f5d4d916093737a75203a5c5286fde60e503e4276c7a",
"sha256:7a38250f433cd41df7fcb763caa3ee9362777fdb4dc642b9a349721d2bf47404",
"sha256:8f79b5ff5ad9d3218afb1e7e20ea74da5f76943ee5edb7f76e56ec5161ec782b",
"sha256:956ba8701b4ffe91ba59665ed170a2ebbdc6fc0e40de5f6059195d9f2b33ca0e",
"sha256:a04386fb7bc85fab9cd51b6308633a3c271e3d0d3eae917eebab2fac6219b6d2",
"sha256:a95f4802d49faa6a674242e25bfeea6fc2acd915b5e5e29ac90a32b1139cae1c",
"sha256:adc0d980fd2760c9e5de537c28935cc32b9353baaf28e0814df417619c6c8c3b",
"sha256:aecbb1592b0188e030cb01f82d12556cf72e218280f621deed7d806afd2113f9",
"sha256:b12794f01d4cacfbd3177b9042198f3af1c856eedd0a98f10f141385c809a14b",
"sha256:c0764e72b36a3dc065c155e5b22f93df465da9c39af65516fe04ed3c68c92636",
"sha256:c33c0d32b8594fa647d2e01dbccc303478e16fdd7cf98652d5b3ed11aa5e5c99",
"sha256:cbaba590180cba88cb99a5f76f90808a624f18b169b90a4abb40c1fd8c19420e",
"sha256:d5a1bd0e9e2031465761dfa920c16b0065ad77321d8a8c1f5ee331021fda65e9"
],
"index": "pypi",
"version": "==39.0.2"
"version": "==40.0.2"
},
"docopt": {
"hashes": [
@ -271,11 +259,11 @@
},
"filelock": {
"hashes": [
"sha256:3199fd0d3faea8b911be52b663dfccceb84c95949dd13179aa21436d1a79c4ce",
"sha256:e90b34656470756edf8b19656785c5fea73afa1953f3e1b0d645cef11cab3182"
"sha256:ad98852315c2ab702aeb628412cbf7e95b7ce8c3bf9565670b4eaecf1db370a9",
"sha256:fc03ae43288c013d2ea83c8597001b1129db351aad9c57fe2409327916b8e718"
],
"index": "pypi",
"version": "==3.10.0"
"version": "==3.12.0"
},
"flask": {
"hashes": [
@ -488,11 +476,11 @@
},
"packaging": {
"hashes": [
"sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2",
"sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"
"sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61",
"sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"
],
"markers": "python_version >= '3.7'",
"version": "==23.0"
"version": "==23.1"
},
"passlib": {
"hashes": [
@ -553,21 +541,11 @@
},
"pyasn1": {
"hashes": [
"sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359",
"sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576",
"sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf",
"sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7",
"sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d",
"sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00",
"sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8",
"sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86",
"sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12",
"sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776",
"sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba",
"sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2",
"sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3"
"sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57",
"sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde"
],
"version": "==0.4.8"
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
"version": "==0.5.0"
},
"pycparser": {
"hashes": [
@ -578,11 +556,11 @@
},
"pyopenssl": {
"hashes": [
"sha256:c1cc5f86bcacefc84dada7d31175cae1b1518d5f60d3d0bb595a67822a868a6f",
"sha256:df5fc28af899e74e19fccb5510df423581047e10ab6f1f4ba1763ff5fde844c0"
"sha256:841498b9bec61623b1b6c47ebbc02367c07d60e0e195f19790817f10cc8db0b7",
"sha256:9e0c526404a210df9d2b18cd33364beadb0dc858a739b885677bc65e105d4a4c"
],
"markers": "python_version >= '3.6'",
"version": "==23.0.0"
"version": "==23.1.1"
},
"pyparsing": {
"hashes": [
@ -600,19 +578,19 @@
},
"pytest": {
"hashes": [
"sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e",
"sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4"
"sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362",
"sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3"
],
"index": "pypi",
"version": "==7.2.2"
"version": "==7.3.1"
},
"pytest-asyncio": {
"hashes": [
"sha256:83cbf01169ce3e8eb71c6c278ccb0574d1a7a3bb8eaaf5e50e0ad342afb33b36",
"sha256:f129998b209d04fcc65c96fc85c11e5316738358909a8399e93be553d7656442"
"sha256:2b38a496aef56f56b0e87557ec313e11e1ab9276fc3863f6a7be0f1d0e415e1b",
"sha256:f2b3366b7cd501a4056858bd39349d5af19742aed2d81660b7998b6341c7eb9c"
],
"index": "pypi",
"version": "==0.20.3"
"version": "==0.21.0"
},
"pytest-repeat": {
"hashes": [
@ -638,6 +616,52 @@
"index": "pypi",
"version": "==3.2.1"
},
"pyyaml": {
"hashes": [
"sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf",
"sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293",
"sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b",
"sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57",
"sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b",
"sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4",
"sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07",
"sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba",
"sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9",
"sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287",
"sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513",
"sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0",
"sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782",
"sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0",
"sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92",
"sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f",
"sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2",
"sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc",
"sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1",
"sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c",
"sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86",
"sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4",
"sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c",
"sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34",
"sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b",
"sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d",
"sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c",
"sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb",
"sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7",
"sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737",
"sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3",
"sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d",
"sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358",
"sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53",
"sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78",
"sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803",
"sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a",
"sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f",
"sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174",
"sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"
],
"index": "pypi",
"version": "==6.0"
},
"ruamel.yaml": {
"hashes": [
"sha256:1a771fc92d3823682b7f0893ad56cb5a5c87c48e62b5399d6f42c8759a583b33",
@ -705,20 +729,20 @@
},
"tornado": {
"hashes": [
"sha256:1d54d13ab8414ed44de07efecb97d4ef7c39f7438cf5e976ccd356bebb1b5fca",
"sha256:20f638fd8cc85f3cbae3c732326e96addff0a15e22d80f049e00121651e82e72",
"sha256:5c87076709343557ef8032934ce5f637dbb552efa7b21d08e89ae7619ed0eb23",
"sha256:5f8c52d219d4995388119af7ccaa0bcec289535747620116a58d830e7c25d8a8",
"sha256:6fdfabffd8dfcb6cf887428849d30cf19a3ea34c2c248461e1f7d718ad30b66b",
"sha256:87dcafae3e884462f90c90ecc200defe5e580a7fbbb4365eda7c7c1eb809ebc9",
"sha256:9b630419bde84ec666bfd7ea0a4cb2a8a651c2d5cccdbdd1972a0c859dfc3c13",
"sha256:b8150f721c101abdef99073bf66d3903e292d851bee51910839831caba341a75",
"sha256:ba09ef14ca9893954244fd872798b4ccb2367c165946ce2dd7376aebdde8e3ac",
"sha256:d3a2f5999215a3a06a4fc218026cd84c61b8b2b40ac5296a6db1f1451ef04c1e",
"sha256:e5f923aa6a47e133d1cf87d60700889d7eae68988704e20c75fb2d65677a8e4b"
"sha256:4546003dc8b5733489139d3bff5fa6a0211be505faf819bd9970e7c2b32e8122",
"sha256:4d349846931557b7ec92f224b5d598b160e2ba26ae1812480b42e9622c884bf7",
"sha256:6164571f5b9f73143d1334df4584cb9ac86d20c461e17b6c189a19ead8bb93c1",
"sha256:6cfff1e9c15c79e106b8352269d201f8fc0815914a6260f3893ca18b724ea94b",
"sha256:720f53e6367b38190ae7fa398c25c086c69d88b3c6535bd6021a126b727fb5cd",
"sha256:912df5712024564e362ecce43c8d5862e14c78c8dd3846c9d889d44fbd7f4951",
"sha256:c37b6a384d54ce6a31168d40ab21ad2591ddaf34973075cc0cad154402ecd9e8",
"sha256:c659ab04d5aa477dbe44152c67d93f3ad3243b992d94f795ca1d5c73c37337ce",
"sha256:c9114a61a4588c09065b9996ae05462350d17160b92b9bf9a1e93689cc0424dc",
"sha256:d68f3192936ff2c4add04dc21a436a43b4408d466746b78bb2b9d0a53a18683f",
"sha256:d7b737e18f701de3e4a3b0824260b4d740e4d60607b8089bb80e80ffd464780e"
],
"markers": "python_version >= '3.7'",
"version": "==6.2"
"markers": "python_version >= '3.8'",
"version": "==6.3"
},
"typing-extensions": {
"hashes": [
@ -808,42 +832,42 @@
"develop": {
"attrs": {
"hashes": [
"sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836",
"sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"
"sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04",
"sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"
],
"markers": "python_version >= '3.6'",
"version": "==22.2.0"
"markers": "python_version >= '3.7'",
"version": "==23.1.0"
},
"black": {
"hashes": [
"sha256:0052dba51dec07ed029ed61b18183942043e00008ec65d5028814afaab9a22fd",
"sha256:0680d4380db3719ebcfb2613f34e86c8e6d15ffeabcf8ec59355c5e7b85bb555",
"sha256:121ca7f10b4a01fd99951234abdbd97728e1240be89fde18480ffac16503d481",
"sha256:162e37d49e93bd6eb6f1afc3e17a3d23a823042530c37c3c42eeeaf026f38468",
"sha256:2a951cc83ab535d248c89f300eccbd625e80ab880fbcfb5ac8afb5f01a258ac9",
"sha256:2bf649fda611c8550ca9d7592b69f0637218c2369b7744694c5e4902873b2f3a",
"sha256:382998821f58e5c8238d3166c492139573325287820963d2f7de4d518bd76958",
"sha256:49f7b39e30f326a34b5c9a4213213a6b221d7ae9d58ec70df1c4a307cf2a1580",
"sha256:57c18c5165c1dbe291d5306e53fb3988122890e57bd9b3dcb75f967f13411a26",
"sha256:7a0f701d314cfa0896b9001df70a530eb2472babb76086344e688829efd97d32",
"sha256:8178318cb74f98bc571eef19068f6ab5613b3e59d4f47771582f04e175570ed8",
"sha256:8b70eb40a78dfac24842458476135f9b99ab952dd3f2dab738c1881a9b38b753",
"sha256:9880d7d419bb7e709b37e28deb5e68a49227713b623c72b2b931028ea65f619b",
"sha256:9afd3f493666a0cd8f8df9a0200c6359ac53940cbde049dcb1a7eb6ee2dd7074",
"sha256:a29650759a6a0944e7cca036674655c2f0f63806ddecc45ed40b7b8aa314b651",
"sha256:a436e7881d33acaf2536c46a454bb964a50eff59b21b51c6ccf5a40601fbef24",
"sha256:a59db0a2094d2259c554676403fa2fac3473ccf1354c1c63eccf7ae65aac8ab6",
"sha256:a8471939da5e824b891b25751955be52ee7f8a30a916d570a5ba8e0f2eb2ecad",
"sha256:b0bd97bea8903f5a2ba7219257a44e3f1f9d00073d6cc1add68f0beec69692ac",
"sha256:b6a92a41ee34b883b359998f0c8e6eb8e99803aa8bf3123bf2b2e6fec505a221",
"sha256:bb460c8561c8c1bec7824ecbc3ce085eb50005883a6203dcfb0122e95797ee06",
"sha256:bfffba28dc52a58f04492181392ee380e95262af14ee01d4bc7bb1b1c6ca8d27",
"sha256:c1c476bc7b7d021321e7d93dc2cbd78ce103b84d5a4cf97ed535fbc0d6660648",
"sha256:c91dfc2c2a4e50df0026f88d2215e166616e0c80e86004d0003ece0488db2739",
"sha256:e6663f91b6feca5d06f2ccd49a10f254f9298cc1f7f49c46e498a0771b507104"
"sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5",
"sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915",
"sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326",
"sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940",
"sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b",
"sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30",
"sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c",
"sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c",
"sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab",
"sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27",
"sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2",
"sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961",
"sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9",
"sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb",
"sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70",
"sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331",
"sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2",
"sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266",
"sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d",
"sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6",
"sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b",
"sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925",
"sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8",
"sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4",
"sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3"
],
"index": "pypi",
"version": "==23.1.0"
"version": "==23.3.0"
},
"click": {
"hashes": [
@ -863,11 +887,11 @@
},
"flake8-bugbear": {
"hashes": [
"sha256:beb5c7efcd7ccc2039ef66a77bb8db925e7be3531ff1cb4d0b7030d0e2113d72",
"sha256:e3e7f74c8a49ad3794a7183353026dabd68c74030d5f46571f84c1fb0eb79363"
"sha256:8a218d13abd6904800970381057ce8e72cde8eea743240c4ef3ede4dd0bc9cfb",
"sha256:ea565bdb87b96b56dc499edd6cc3ba7f695373d902a5f56c989b74fad7c7719d"
],
"index": "pypi",
"version": "==23.3.12"
"version": "==23.3.23"
},
"isort": {
"hashes": [
@ -895,11 +919,11 @@
},
"packaging": {
"hashes": [
"sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2",
"sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"
"sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61",
"sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"
],
"markers": "python_version >= '3.7'",
"version": "==23.0"
"version": "==23.1"
},
"pathspec": {
"hashes": [
@ -911,11 +935,11 @@
},
"platformdirs": {
"hashes": [
"sha256:024996549ee88ec1a9aa99ff7f8fc819bb59e2c3477b410d90a16d32d6e707aa",
"sha256:e5986afb596e4bb5bde29a79ac9061aa955b94fca2399b7aaac4090860920dd8"
"sha256:d5b638ca397f25f979350ff789db335903d7ea010ab28903f57b27e1b16c2b08",
"sha256:ebe11c0d7a805086e99506aa331612429a72ca7cd52a1f0d277dc4adc20cb10e"
],
"markers": "python_version >= '3.7'",
"version": "==3.1.1"
"version": "==3.2.0"
},
"pycodestyle": {
"hashes": [

View File

@ -15,6 +15,8 @@ WORKER2 = "worker2"
REGULAR_USER_NAME = "regularuser"
SUPER_USER_NAME = "postgres"
DATABASE_NAME = "postgres"
ARBITRARY_SCHEDULE_NAMES = [
"create_schedule",
"sql_schedule",
@ -96,6 +98,7 @@ class CitusBaseClusterConfig(object, metaclass=NewInitCaller):
self.temp_dir = CITUS_ARBITRARY_TEST_DIR
self.worker_amount = 2
self.user = REGULAR_USER_NAME
self.dbname = DATABASE_NAME
self.is_mx = True
self.is_citus = True
self.name = type(self).__name__

View File

@ -0,0 +1,2 @@
__pycache__
*-env

View File

@ -0,0 +1,223 @@
## Goal of the Tool
Tool supports a simple syntax to be useful to generate queries with different join orders. Main motivation for me to create the tool was to compare results of the generated queries for different [Citus](https://github.com/citusdata/citus) tables and Postgres tables. That is why we support a basic syntax for now. It can be extended to support different queries.
## Query Generator for Postgres
Tool generates SELECT queries, whose depth can be configured, with different join orders. It also generates DDLs required for query execution.
You can also tweak configuration parameters for data inserting command generation.
## How to Run Citus Join Verification?
You can verify if Citus breaks any default PG join behaviour via `citus_compare_dist_local_joins.sh`. It creates
tables specified in config. Then, it runs generated queries on those tables and saves the results into `out/dist_queries.out`.
After running those queries for Citus tables, it creates PG tables with the same names as previous run, executes the same
queries, and saves the results into `out/local_queries.out`. In final step, it generates diff between local and distributed results.
You can see the contents of `out/local_dist_diffs` to see if there is any Citus unsupported query.
1. Create a Citus local cluster with 2 workers by using [citus_dev](https://github.com/citusdata/tools/tree/develop/citus_dev) tool
(Note: make sure you do not configure psql via .psqlrc file as it would fail the test.)
```bash
citus_dev make testCluster --destroy
```
2. Run the test,
```bash
cd src/test/regress/citus_tests/query_generator/bin
bash citus_compare_dist_local_joins.sh <username> <dbname> <coordinator_port>
```
3. See the diff content in `src/test/regress/citus_tests/query_generator/out/local_dist_diffs`
### Configuration
You can configure 3 different parts:
- [DDL Configuration](#ddl-configuration)
- [Data Insertion Configuration](#data-insertion-configuration)
- [Query Configuration](#query-configuration)
## DDL Configuration
Tool generates related ddl commands before generating queries.
Schema for DDL configuration:
```yaml
ddlOutFile: <string>
commonColName: <string>
targetTables: <Table[]>
- Table:
name: <string>
citusType: <CitusType>
maxAllowedUseOnQuery: <int>
rowCount: <int>
nullRate: <float>
duplicateRate: <float>
columns: <Column[]>
- Column:
name: <string>
type: <string>
distinctCopyCount: <int>
```
Explanation:
```yaml
ddlOutFile: "file to write generated DDL commands"
commonColName: "name of the column that will be used as distribution column, filter column in restrictions and target column in selections"
targetTables: "array of tables that will be used in generated queries"
- Table:
name: "name prefix of table"
citusType: "citus type of table"
maxAllowedUseOnQuery: "limits how many times table can appear in query"
rowCount: "total # of rows that will be inserted into table"
nullRate: "percentage of null rows in rowCount that will be inserted into table"
duplicateRate: "percentage of duplicates in rowCount that will be inserted into table"
columns: "array of columns in table"
- Column:
name: "name of column"
type: "name of data type of column(only support 'int' now)"
distinctCopyCount: "how many tables with the same configuration we should create(only by changing full name, still using the same name prefix)"
```
## Data Insertion Configuration
Tool generates data insertion commands if you want tables with filled data. You can configure total number of rows, what percentage of them should
be null and what percentage of them should be duplicated. For related configuration see Table schema at [DDL Configuration](#ddl-configuration). You
can also configure range of the randomly generated data. See `dataRange` at [Query Configuration](#query-configuration).
## Query Configuration
After generation of ddls and data insertion commands, the tool generates queries.
Schema for Query configuration:
```yaml
queryCount: <int>
queryOutFile: <string>
semiAntiJoin: <bool>
cartesianProduct: <bool>
limit: <bool>
orderby: <bool>
forceOrderbyWithLimit: <bool>
useAvgAtTopLevelTarget: <bool>
dataRange:
from: <int>
to: <int>
filterRange:
from: <int>
to: <int>
limitRange:
from: <int>
to: <int>
targetRteCount: <int>
targetCteCount: <int>
targetCteRteCount: <int>
targetJoinTypes: <JoinType[]>
targetRteTypes: <RteType[]>
targetRestrictOps: <RestrictOp[]>
```
Explanation:
```yaml
queryCount: "number of queries to generate"
queryOutFile: "file to write generated queries"
semiAntiJoin: "should we support semi joins (WHERE col IN (Subquery))"
cartesianProduct: "should we support cartesian joins"
limit: "should we support limit clause"
orderby: "should we support order by clause"
forceOrderbyWithLimit: "should we force order by when we use limit"
useAvgAtTopLevelTarget: "should we make top level query as select avg() from (subquery)"
dataRange:
from: "starting boundary for data generation"
to: "end boundary for data generation"
filterRange:
from: "starting boundary for restriction clause"
to: "end boundary for restriction clause"
limitRange:
from: "starting boundary for limit clause"
to: "end boundary for data limit clause"
targetRteCount: "limits how many rtes should exist in non-cte part of the query"
targetCteCount: "limits how many ctes should exist in query"
targetCteRteCount: "limits how many rtes should exist in cte part of the query"
targetJoinTypes: "supported join types"
targetRteTypes: "supported rte types"
targetRestrictOps: "supported restrict ops"
```
## Misc Configuration
Tool has some configuration options which does not suit above 3 parts.
Schema for misc configuration:
```yaml
interactiveMode: <bool>
```
Explanation:
```yaml
interactiveMode: "when true, interactively prints generated ddls and queries. Otherwise, it writes them to configured files."
```
## Supported Operations
It uses `commonColName` for any kind of target selection required for any supported query clause.
### Column Type Support
Tool currently supports only int data type, but plans to support other basic types.
### Join Support
Tool supports following joins:
```yaml
targetJoinTypes:
- INNER
- LEFT
- RIGHT
- FULL
```
### Citus Table Support
Tool supports following citus table types:
```yaml
targetTables:
- Table:
...
citusType: <one of (DISTRIBUTED || REFERENCE || POSTGRES)>
...
```
### Restrict Operation Support
Tool supports following restrict operations:
```yaml
targetRestrictOps:
- LT
- GT
- EQ
```
### Rte Support
Tool supports following rtes:
```yaml
targetRteTypes:
- RELATION
- SUBQUERY
- CTE
- VALUES
```
## How to Generate Queries?
You have 2 different options.
- [Interactive Mode](#interactive-mode)
- [File Mode](#file-mode)
### Interactive Mode
In this mode, you will be prompted to continue generating a query. When you hit to `Enter`, it will continue generating them.
You will need to hit to `x` to exit.
1. Configure `interactiveMode: true` in config.yml,
2. Run the command shown below
```bash
cd src/test/regress/citus_tests/query_generator
python generate_queries.py
```
### File Mode
In this mode, generated ddls and queries will be written into the files configured in [config.yml](./config/config.yaml).
1. Configure `interactiveMode: false`,
2. Configure `queryCount: <total_query>`,
3. Configure `queryOutFile: <query_file_path>` and `ddlOutFile: <ddlfile_path>`
4. Run the command shown below
```bash
cd src/test/regress/citus_tests/query_generator
python generate_queries.py
```

View File

@ -0,0 +1,60 @@
#!/bin/bash
# make bash behave
set -euo pipefail
psql_user=$1
psql_db=$2
psql_port=$3
runDDLs()
{
# run ddls
psql -U "${psql_user}" -d "${psql_db}" -p "${psql_port}" -f "${out_folder}"/ddls.sql > /dev/null
}
runUndistributeTables()
{
undistribute_all_tables_command='SELECT undistribute_table(logicalrelid) FROM pg_dist_partition;'
# run undistribute all tables
psql -U "${psql_user}" -d "${psql_db}" -p "${psql_port}" -c "${undistribute_all_tables_command}" > /dev/null
}
runQueries()
{
out_filename=$1
# run dmls
# echo queries and comments for query tracing
psql -U "${psql_user}" -d "${psql_db}" -p "${psql_port}" \
--echo-all \
-f "${out_folder}"/queries.sql > "${out_filename}" 2>&1
}
showDiffs()
{
pushd . && cd "${script_folder}" && python3 diff-checker.py && popd
}
# run query generator and let it create output ddls and queries
script_folder=$(dirname "$0")
out_folder="${script_folder}"/../out
pushd . && cd "${script_folder}"/.. && python3 generate_queries.py && popd
# remove result files if exists
rm -rf "${out_folder}"/dist_queries.out "${out_folder}"/local_queries.out
# run ddls
runDDLs
# runs dmls for distributed tables
runQueries "${out_folder}"/dist_queries.out
# undistribute all dist tables
runUndistributeTables
# runs the same dmls for pg local tables
runQueries "${out_folder}"/local_queries.out
# see diffs in results
showDiffs

View File

@ -0,0 +1,122 @@
import os
import re
import subprocess
import sys
def createPatternForFailedQueryBlock(acceptableErrors):
totalAcceptableOrders = len(acceptableErrors)
failedQueryBlockPattern = "-- queryId: [0-9]+\n.*\npsql:.*(?:"
for acceptableErrorIdx in range(totalAcceptableOrders):
failedQueryBlockPattern += acceptableErrors[acceptableErrorIdx]
if acceptableErrorIdx < totalAcceptableOrders - 1:
failedQueryBlockPattern += "|"
failedQueryBlockPattern += ")"
return failedQueryBlockPattern
def findFailedQueriesFromFile(queryOutFile, acceptableErrors):
failedQueryIds = []
outFileContent = ""
failedQueryBlockPattern = createPatternForFailedQueryBlock(acceptableErrors)
queryIdPattern = "queryId: ([0-9]+)"
with open(queryOutFile, "r") as f:
outFileContent = f.read()
failedQueryContents = re.findall(failedQueryBlockPattern, outFileContent)
failedQueryIds = [
re.search(queryIdPattern, failedQueryContent)[1]
for failedQueryContent in failedQueryContents
]
return failedQueryIds
def removeFailedQueryOutputFromFile(outFile, failedQueryIds):
if len(failedQueryIds) == 0:
return
distOutFileContentAsLines = []
with open(outFile, "r") as f:
distOutFileContentAsLines = f.readlines()
with open(outFile, "w") as f:
clear = False
nextIdx = 0
nextQueryIdToDelete = failedQueryIds[nextIdx]
queryIdPattern = "queryId: ([0-9]+)"
for line in distOutFileContentAsLines:
matched = re.search(queryIdPattern, line)
# founded line which contains query id
if matched:
# query id matches with the next failed query's id
if nextQueryIdToDelete == matched[1]:
# clear lines until we find succesfull query
clear = True
nextIdx += 1
if nextIdx < len(failedQueryIds):
nextQueryIdToDelete = failedQueryIds[nextIdx]
else:
# we found successfull query
clear = False
if not clear:
f.write(line)
return
def removeFailedQueryOutputFromFiles(distQueryOutFile, localQueryOutFile):
# remove the failed distributed query from both local and distributed query files to prevent diff for acceptable errors
# some of generated queries fails with below errors due to https://github.com/citusdata/citus/issues/6653, we skip those until we support them
acceptableErrors = [
"ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns",
"ERROR: recursive complex joins are only supported when all distributed tables are co-located and joined on their distribution columns",
]
failedDistQueryIds = findFailedQueriesFromFile(distQueryOutFile, acceptableErrors)
removeFailedQueryOutputFromFile(distQueryOutFile, failedDistQueryIds)
removeFailedQueryOutputFromFile(localQueryOutFile, failedDistQueryIds)
return
def showDiffs(distQueryOutFile, localQueryOutFile, diffFile):
exitCode = 1
with open(diffFile, "w") as f:
diffCommand = "diff -u {} {}".format(localQueryOutFile, distQueryOutFile)
process = subprocess.Popen(diffCommand.split(), stdout=f, stderr=f, shell=False)
process.wait()
exitCode = process.returncode
print("diff exit {}".format(exitCode))
return exitCode
def exitIfAnyLocalQueryFailed(localQueryOutFile):
allErrors = ["ERROR:"]
failedLocalQueryIds = findFailedQueriesFromFile(localQueryOutFile, allErrors)
assert (
len(failedLocalQueryIds) == 0
), """There might be an internal error related to query generator or
we might find a Postgres bug. Check local_queries.out to see the error."""
return
if __name__ == "__main__":
scriptDirPath = os.path.dirname(os.path.abspath(__file__))
outFolderPath = scriptDirPath + "/../out"
localQueryOutFile = "{}/local_queries.out".format(outFolderPath)
distQueryOutFile = "{}/dist_queries.out".format(outFolderPath)
diffFile = "{}/local_dist.diffs".format(outFolderPath)
# exit if we have any error from local queries
exitIfAnyLocalQueryFailed(localQueryOutFile)
# find failed queries from distQueryOutFile and then remove failed queries and their results from
# both distQueryOutFile and localQueryOutFile
removeFailedQueryOutputFromFiles(distQueryOutFile, localQueryOutFile)
# show diffs in unified format
exitCode = showDiffs(distQueryOutFile, localQueryOutFile, diffFile)
sys.exit(exitCode)

View File

@ -0,0 +1,63 @@
#!/usr/bin/env python3
"""query_gen_test
Usage:
run_query_compare_test --bindir=<bindir> --pgxsdir=<pgxsdir>
Options:
--bindir=<bindir> PostgreSQL executable directory(ex: '~/.pgenv/pgsql-10.4/bin')
--pgxsdir=<pgxsdir> Path to the PGXS directory(ex: ~/.pgenv/src/postgresql-11.3)
"""
import os
import subprocess
import sys
from docopt import docopt
# https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time/14132912#14132912
sys.path.append(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
)
# ignore E402 because these imports require addition to path
import common # noqa: E402
import config as cfg # noqa: E402
def run_test(config):
# start cluster
common.initialize_temp_dir(cfg.CITUS_ARBITRARY_TEST_DIR)
common.initialize_citus_cluster(
config.bindir, config.datadir, config.settings, config
)
# run test
scriptDirPath = os.path.dirname(os.path.abspath(__file__))
testRunCommand = "bash {}/citus_compare_dist_local_joins.sh {} {} {}".format(
scriptDirPath, config.user, config.dbname, config.coordinator_port()
)
process = subprocess.Popen(
testRunCommand.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
stdout, stderr = process.communicate()
# stop cluster
common.stop_databases(
config.bindir, config.datadir, config.node_name_to_ports, config.name
)
print(stdout)
print(stderr)
print(process.returncode)
sys.exit(process.returncode)
if __name__ == "__main__":
citusClusterConfig = cfg.CitusSuperUserDefaultClusterConfig(
docopt(__doc__, version="run_query_compare_test")
)
run_test(citusClusterConfig)

View File

@ -0,0 +1,129 @@
import copy
import yaml
from config.config_parser import (
parseJoinTypeArray,
parseRange,
parseRestrictOpArray,
parseRteTypeArray,
parseTableArray,
)
from node_defs import CitusType
class Config:
def __init__(self):
configObj = Config.parseConfigFile("config/config.yaml")
self.targetTables = _distinctCopyTables(
parseTableArray(configObj["targetTables"])
)
self.targetJoinTypes = parseJoinTypeArray(configObj["targetJoinTypes"])
self.targetRteTypes = parseRteTypeArray(configObj["targetRteTypes"])
self.targetRestrictOps = parseRestrictOpArray(configObj["targetRestrictOps"])
self.commonColName = configObj["commonColName"]
self.targetRteCount = configObj["targetRteCount"]
self.targetCteCount = configObj["targetCteCount"]
self.targetCteRteCount = configObj["targetCteRteCount"]
self.semiAntiJoin = configObj["semiAntiJoin"]
self.cartesianProduct = configObj["cartesianProduct"]
self.limit = configObj["limit"]
self.orderby = configObj["orderby"]
self.forceOrderbyWithLimit = configObj["forceOrderbyWithLimit"]
self.useAvgAtTopLevelTarget = configObj["useAvgAtTopLevelTarget"]
self.interactiveMode = configObj["interactiveMode"]
self.queryOutFile = configObj["queryOutFile"]
self.ddlOutFile = configObj["ddlOutFile"]
self.queryCount = configObj["queryCount"]
self.dataRange = parseRange(configObj["dataRange"])
self.filterRange = parseRange(configObj["filterRange"])
self.limitRange = parseRange(configObj["limitRange"])
self._ensureRteLimitsAreSane()
# print(self)
def __repr__(self):
rep = "targetRteCount: {}\n".format(self.targetRteCount)
rep += "targetCteCount: {}\n".format(self.targetCteCount)
rep += "targetCteRteCount: {}\n".format(self.targetCteRteCount)
rep += "targetRteTypes:\n"
for rteType in self.targetRteTypes:
rep += "\t{}\n".format(rteType)
rep += "targetJoinTypes:\n"
for joinType in self.targetJoinTypes:
rep += "\t{}\n".format(joinType)
rep += "restrictOps:\n"
for restrictOp in self.targetRestrictOps:
rep += "\t{}\n".format(restrictOp)
return rep
def _ensureRteLimitsAreSane(self):
totalAllowedUseForAllTables = 0
for table in self.targetTables:
totalAllowedUseForAllTables += table.maxAllowedUseOnQuery
assert (
totalAllowedUseForAllTables >= self.targetRteCount
), """targetRteCount cannot be greater than sum of maxAllowedUseOnQuery for all tables.
Set targetRteCount to a lower value or increase maxAllowedUseOnQuery for tables at config.yml."""
@staticmethod
def parseConfigFile(path):
try:
with open(path, "r") as configFile:
return yaml.load(configFile, yaml.Loader)
except IOError as e:
print(f"I/O error({0}): {1}".format(e.errno, e.strerror))
raise BaseException("cannot parse config.yaml")
except Exception:
print("Unexpected error while parsing config.yml.")
_config = None
def resetConfig():
global _config
_config = Config()
def getConfig():
return _config
def getAllTableNames():
"""returns table names from target tables given at config"""
tables = getConfig().targetTables
tableNames = [table.name for table in tables]
return tableNames
def getMaxAllowedCountForTable(tableName):
tables = getConfig().targetTables
filtered = filter(lambda el: el.name == tableName, tables)
filtered = list(filtered)
assert len(filtered) == 1
return filtered[0].maxAllowedUseOnQuery
def isTableDistributed(table):
return table.citusType == CitusType.DISTRIBUTED
def isTableReference(table):
return table.citusType == CitusType.REFERENCE
def _distinctCopyTables(tables):
distinctCopyTables = []
for table in tables:
distinctCopyCount = table.distinctCopyCount
for tblIdx in range(1, distinctCopyCount):
distinctCopyTable = copy.deepcopy(table)
distinctCopyTable.name += str(tblIdx)
distinctCopyTables.append(distinctCopyTable)
table.name += "0"
tables.extend(distinctCopyTables)
return tables

View File

@ -0,0 +1,65 @@
interactiveMode: false
queryCount: 250
queryOutFile: queries.sql
ddlOutFile: ddls.sql
semiAntiJoin: true
cartesianProduct: false
limit: true
orderby: true
forceOrderbyWithLimit: true
useAvgAtTopLevelTarget: true
dataRange:
from: 0
to: 10
filterRange:
from: 0
to: 10
limitRange:
from: 0
to: 10
targetRteCount: 40
targetCteCount: 3
targetCteRteCount: 2
commonColName: id
targetTables:
- Table:
name: dist
citusType: DISTRIBUTED
maxAllowedUseOnQuery: 10
rowCount: 10
nullRate: 0.1
duplicateRate: 0.1
columns:
- Column:
name: id
type: int
distinctCopyCount: 2
- Table:
name: ref
citusType: REFERENCE
maxAllowedUseOnQuery: 10
rowCount: 10
nullRate: 0.1
duplicateRate: 0.1
columns:
- Column:
name: id
type: int
distinctCopyCount: 2
targetJoinTypes:
- INNER
- LEFT
- RIGHT
- FULL
targetRteTypes:
- RELATION
- SUBQUERY
- CTE
targetRestrictOps:
- LT
- GT

View File

@ -0,0 +1,81 @@
from node_defs import CitusType, Column, JoinType, RestrictOp, RTEType, Table
def parseJoinType(joinTypeText):
return JoinType[joinTypeText]
def parseJoinTypeArray(joinTypeTexts):
joinTypes = []
for joinTypeText in joinTypeTexts:
joinType = parseJoinType(joinTypeText)
joinTypes.append(joinType)
return joinTypes
def parseRteType(rteTypeText):
return RTEType[rteTypeText]
def parseRteTypeArray(rteTypeTexts):
rteTypes = []
for rteTypeText in rteTypeTexts:
rteType = parseRteType(rteTypeText)
rteTypes.append(rteType)
return rteTypes
def parseRestrictOp(restrictOpText):
return RestrictOp[restrictOpText]
def parseRestrictOpArray(restrictOpTexts):
restrictOps = []
for restrictOpText in restrictOpTexts:
restrictOp = parseRestrictOp(restrictOpText)
restrictOps.append(restrictOp)
return restrictOps
def parseTable(targetTableDict):
name = targetTableDict["name"]
citusType = CitusType[targetTableDict["citusType"]]
maxAllowedUseOnQuery = targetTableDict["maxAllowedUseOnQuery"]
rowCount = targetTableDict["rowCount"]
nullRate = targetTableDict["nullRate"]
duplicateRate = targetTableDict["duplicateRate"]
columns = []
for columnDict in targetTableDict["columns"]:
col = parseColumn(columnDict)
columns.append(col)
distinctCopyCount = targetTableDict["distinctCopyCount"]
return Table(
name,
citusType,
maxAllowedUseOnQuery,
rowCount,
nullRate,
duplicateRate,
columns,
distinctCopyCount,
)
def parseTableArray(targetTableDicts):
tables = []
for targetTableDict in targetTableDicts:
table = parseTable(targetTableDict["Table"])
tables.append(table)
return tables
def parseColumn(targetColumnDict):
name = targetColumnDict["name"]
type = targetColumnDict["type"]
return Column(name, type)
def parseRange(rangeDict):
fromVal = rangeDict["from"]
toVal = rangeDict["to"]
return (fromVal, toVal)

View File

@ -0,0 +1,81 @@
from node_defs import CitusType
from config.config import getConfig
def getTableData():
dataGenerationSql = ""
tableIdx = 1
(fromVal, toVal) = getConfig().dataRange
tables = getConfig().targetTables
for table in tables:
# generate base rows
dataGenerationSql += _genOverlappingData(table.name, fromVal, table.rowCount)
dataGenerationSql += _genNonOverlappingData(table.name, toVal, tableIdx)
dataGenerationSql += "\n"
# generate null rows
if not table.citusType == CitusType.DISTRIBUTED:
targetNullRows = int(table.rowCount * table.nullRate)
dataGenerationSql += _genNullData(table.name, targetNullRows)
dataGenerationSql += "\n"
# generate duplicate rows
targetDuplicateRows = int(table.rowCount * table.duplicateRate)
dataGenerationSql += _genDupData(table.name, targetDuplicateRows)
dataGenerationSql += "\n\n"
tableIdx += 1
return dataGenerationSql
def _genOverlappingData(tableName, startVal, rowCount):
"""returns string to fill table with [startVal,startVal+rowCount] range of integers"""
dataGenerationSql = ""
dataGenerationSql += "INSERT INTO " + tableName
dataGenerationSql += (
" SELECT i FROM generate_series("
+ str(startVal)
+ ","
+ str(startVal + rowCount)
+ ") i;"
)
return dataGenerationSql
def _genNullData(tableName, nullCount):
"""returns string to fill table with NULLs"""
dataGenerationSql = ""
dataGenerationSql += "INSERT INTO " + tableName
dataGenerationSql += (
" SELECT NULL FROM generate_series(0," + str(nullCount) + ") i;"
)
return dataGenerationSql
def _genDupData(tableName, dupRowCount):
"""returns string to fill table with duplicate integers which are fetched from given table"""
dataGenerationSql = ""
dataGenerationSql += "INSERT INTO " + tableName
dataGenerationSql += (
" SELECT * FROM "
+ tableName
+ " ORDER BY "
+ getConfig().commonColName
+ " LIMIT "
+ str(dupRowCount)
+ ";"
)
return dataGenerationSql
def _genNonOverlappingData(tableName, startVal, tableIdx):
"""returns string to fill table with different integers for given table"""
startVal = startVal + tableIdx * 20
endVal = startVal + 20
dataGenerationSql = ""
dataGenerationSql += "INSERT INTO " + tableName
dataGenerationSql += (
" SELECT i FROM generate_series(" + str(startVal) + "," + str(endVal) + ") i;"
)
return dataGenerationSql

View File

@ -0,0 +1,48 @@
from config.config import getConfig, isTableDistributed, isTableReference
def getTableDDLs():
ddls = ""
tables = getConfig().targetTables
for table in tables:
ddls += _genTableDDL(table)
ddls += "\n"
return ddls
def _genTableDDL(table):
ddl = ""
ddl += "DROP TABLE IF EXISTS " + table.name + ";"
ddl += "\n"
ddl += "CREATE TABLE " + table.name + "("
for column in table.columns[:-1]:
ddl += _genColumnDDL(column)
ddl += ",\n"
if len(table.columns) > 0:
ddl += _genColumnDDL(table.columns[-1])
ddl += ");\n"
if isTableDistributed(table):
ddl += (
"SELECT create_distributed_table("
+ "'"
+ table.name
+ "','"
+ getConfig().commonColName
+ "'"
+ ");"
)
ddl += "\n"
elif isTableReference(table):
ddl += "SELECT create_reference_table(" + "'" + table.name + "'" + ");"
ddl += "\n"
return ddl
def _genColumnDDL(column):
ddl = ""
ddl += column.name
ddl += " "
ddl += column.type
return ddl

View File

@ -0,0 +1,65 @@
import signal
import sys
from data_gen import getTableData
from ddl_gen import getTableDDLs
from query_gen import newQuery
from config.config import getConfig, resetConfig
def _signal_handler(sig, frame):
sys.exit(0)
def _interactiveMode(ddls, data):
print(ddls)
print(data)
while True:
res = input("Press x to exit or Enter to generate more")
if res.lower() == "x":
print("Exit from query generation mode!")
sys.exit(0)
query = newQuery()
print(query)
resetConfig()
def _fileMode(ddls, data):
ddlFileName = "out/" + getConfig().ddlOutFile
with open(ddlFileName, "w") as ddlFile:
ddlFile.writelines([ddls, data])
queryCount = getConfig().queryCount
fileName = "out/" + getConfig().queryOutFile
with open(fileName, "w") as f:
# enable repartition joins due to https://github.com/citusdata/citus/issues/6865
enableRepartitionJoinCommand = "SET citus.enable_repartition_joins TO on;\n"
queryLines = [enableRepartitionJoinCommand]
queryId = 1
for _ in range(queryCount):
query = newQuery()
queryLine = "-- queryId: " + str(queryId) + "\n"
queryLine += query + "\n\n"
queryLines.append(queryLine)
queryId += 1
f.writelines(queryLines)
if __name__ == "__main__":
signal.signal(signal.SIGINT, _signal_handler)
resetConfig()
ddls = getTableDDLs()
data = getTableData()
if getConfig().interactiveMode:
_interactiveMode(ddls, data)
else:
_fileMode(ddls, data)

View File

@ -0,0 +1,55 @@
from enum import Enum
class JoinType(Enum):
INNER = 1
LEFT = 2
RIGHT = 3
FULL = 4
class RTEType(Enum):
RELATION = 1
SUBQUERY = 2
CTE = 3
VALUES = 4
class RestrictOp(Enum):
LT = 1
GT = 2
EQ = 3
class CitusType(Enum):
DISTRIBUTED = 1
REFERENCE = 2
POSTGRES = 3
class Table:
def __init__(
self,
name,
citusType,
maxAllowedUseOnQuery,
rowCount,
nullRate,
duplicateRate,
columns,
distinctCopyCount,
):
self.name = name
self.citusType = citusType
self.maxAllowedUseOnQuery = maxAllowedUseOnQuery
self.rowCount = rowCount
self.nullRate = nullRate
self.duplicateRate = duplicateRate
self.columns = columns
self.distinctCopyCount = distinctCopyCount
class Column:
def __init__(self, name, type):
self.name = name
self.type = type

View File

@ -0,0 +1,2 @@
*
!.gitignore

View File

@ -0,0 +1,434 @@
import random
from node_defs import RTEType
from random_selections import (
randomJoinOp,
randomRestrictOp,
randomRteType,
shouldSelectThatBranch,
)
from config.config import getAllTableNames, getConfig, getMaxAllowedCountForTable
# query_gen.py generates a new query from grammar rules below. It randomly chooses allowed rules
# to generate a query. Here are some important notes about the query generation:
#
# - We enforce the max allowed # of usage for each table. It also enforces global total # of tables.
# - Table names, restriction types and any other selections are chosen between the values provided by
# configuration file (config.yml).
# - Entry point for the generator is newQuery and all other methods are internal methods. We pass a context
# object named GeneratorContext to all internal methods as parameter to perform checks and generations
# on the query via the context.
# - shouldSelectThatBranch() is useful utility method to randomly chose a grammar rule. Some of the rules
# are only selected if we allowed them in configuration file.
# - We enforce table limits separately if we are inside cte part of the query(see targetCteRteCount).
# We also enforce max # of ctes for a query.
# - commonColName from the config is used at select and where clauses.
# - useAvgAtTopLevelTarget is useful to return single row as query result. It is also useful to track Citus
# query bugs via run_query_compare_test.py.
# - '=' restriction is removed from the config by default to return values different than null most of the time.
# - 'RTE.VALUES' is also removed from the config for the same reason as above.
# - Filter range is selected as same with data range for the same reason as above.
# - aliasStack at GeneratorContext is useful to put correct table names into where clause.
# grammar syntax
#
# ======Assumptions======
# 1. Tables has common dist col
# 2. All operations execute on common column for all tables.
#
# TODO: RTE_FUNCTION, RTE_TABLEFUNC
#
# ====SYNTAX====
# ===Nonterminals===
# Query
# SelectExpr
# FromExpr
# RestrictExpr
# RteList
# Rte
# SubqueryRte
# RelationRte
# JoinList
# JoinOp
# Using
# RestrictList
# Restrict
# CteRte
# CteList
# Cte
# ValuesRte
# Limit
# OrderBy
#
# ===Terminals===
# e 'SELECT' 'FROM' 'INNER JOIN' 'LEFT JOIN' 'RIGHT JOIN' 'FULL JOIN' 'WHERE' 'LIMIT' 'USING' 'WITH'
# 'ORDER BY' 'VALUES' 'IN' 'NOT' 'AS' '<' '>' '=' '*' ',' ';' '(' ')'
#
# ===Rules===
# Start -> Query ';' || 'WITH' CteList Query ';'
# Query -> SelectExpr FromExpr [OrderBy] [Limit] || 'SELECT' 'avg(avgsub.DistColName)' 'FROM' SubqueryRte 'AS avgsub'
# SelectExpr -> 'SELECT' 'curAlias()' '.' DistColName
# FromExpr -> 'FROM' (Rte JoinList JoinOp Rte Using || RteList) ['WHERE' 'nextRandomAlias()' '.' DistColName RestrictExpr]
# RestrictExpr -> ('<' || '>' || '=') Int || ['NOT'] 'IN' SubqueryRte
# JoinList -> JoinOp Rte Using JoinList || e
# Using -> 'USING' '(' DistColName ')'
# RteList -> Rte [, RteList] || Rte
# Rte -> SubqueryRte 'AS' 'nextRandomAlias()' || RelationRte 'AS' 'nextRandomAlias()' ||
# CteRte 'AS' 'nextRandomAlias()' || ValuesRte 'AS' 'nextRandomAlias()'
# SubqueryRte -> '(' Query ')'
# RelationRte -> 'nextRandomTableName()'
# CteRte -> 'randomCteName()'
# CteList -> Cte [',' CteList] || Cte
# Cte -> 'nextRandomAlias()' 'AS' '(' Query ')'
# ValuesRte -> '(' 'VALUES' '(' 'random()' ')' ')'
# JoinOp -> 'INNER JOIN' || 'LEFT JOIN' || 'RIGHT JOIN' || 'FULL JOIN'
# Limit -> 'LIMIT' 'random()'
# OrderBy -> 'ORDER BY' DistColName
# DistColName -> 'hardwired(get from config)'
class GeneratorContext:
"""context to store some variables which should be available during generation"""
def __init__(self):
# each level's last table is used in WHERE clause for the level
self.aliasStack = []
# tracks if we are inside cte as we should not refer cte inside cte
self.insideCte = False
# total rtes in cte + non-cte parts
self.totalRteCount = 0
# rte count in non-cte part to enforce non-cte rte limit
self.currentRteCount = 0
# cte count to enforce cte limit
self.currentCteCount = 0
# rte count in cte part to enforce rte limit in cte
self.currentCteRteCount = 0
# rte counts per table to enforce rte limit per table
self.perTableRtes = {}
# tables which hit count limit
self.disallowedTables = set()
# useful to track usage avg only at first select
self.usedAvg = False
def randomCteName(self):
"""returns a randomly selected cte name"""
randCteRef = random.randint(0, self.currentCteCount - 1)
return " cte_" + str(randCteRef)
def curAlias(self):
"""returns current alias name to be used for the current table"""
return " table_" + str(self.totalRteCount)
def curCteAlias(self):
"""returns current alias name to be used for the current cte"""
return " cte_" + str(self.currentCteCount)
def hasAnyCte(self):
"""returns if context has any cte"""
return self.currentCteCount > 0
def canGenerateNewRte(self):
"""checks if context exceeds allowed rte count"""
return self.currentRteCount < getConfig().targetRteCount
def canGenerateNewCte(self):
"""checks if context exceeds allowed cte count"""
return self.currentCteCount < getConfig().targetCteCount
def canGenerateNewRteInsideCte(self):
"""checks if context exceeds allowed rte count inside a cte"""
return self.currentCteRteCount < getConfig().targetCteRteCount
def addAlias(self, alias):
"""adds given alias to the stack"""
self.aliasStack.append(alias)
def removeLastAlias(self):
"""removes the latest added alias from the stack"""
return self.aliasStack.pop()
def getRteNameEnforcingRteLimits(self):
"""returns rteName by enforcing rte count limits for tables"""
# do not enforce per table rte limit if we are inside cte
if self.insideCte:
rteName = random.choice(getAllTableNames())
return " " + rteName + " "
while True:
# keep trying to find random table by eliminating the ones which hit table limit
allowedNames = set(getAllTableNames()) - self.disallowedTables
assert len(allowedNames) > 0
rteName = random.choice(list(allowedNames))
# not yet added to rte count map, so we can allow the name
if rteName not in self.perTableRtes:
self.perTableRtes[rteName] = 0
break
# limit is not exceeded, so we can allow the name
if self.perTableRtes[rteName] < getMaxAllowedCountForTable(rteName):
break
else:
self.disallowedTables.add(rteName)
# increment rte count for the table name
self.perTableRtes[rteName] += 1
return " " + rteName + " "
def newQuery():
"""returns generated query"""
genCtx = GeneratorContext()
return _start(genCtx)
def _start(genCtx):
# Query ';' || 'WITH' CteList Query ';'
query = ""
if not genCtx.canGenerateNewCte() or shouldSelectThatBranch():
query += _genQuery(genCtx)
else:
genCtx.insideCte = True
query += " WITH "
query += _genCteList(genCtx)
genCtx.insideCte = False
query += _genQuery(genCtx)
query += ";"
return query
def _genQuery(genCtx):
# SelectExpr FromExpr [OrderBy] [Limit] || 'SELECT' 'avg(avgsub.DistColName)' 'FROM' SubqueryRte 'AS avgsub'
query = ""
if (
getConfig().useAvgAtTopLevelTarget
and not genCtx.insideCte
and not genCtx.usedAvg
):
genCtx.usedAvg = True
query += "SELECT "
query += "count(*), avg(avgsub." + getConfig().commonColName + ") FROM "
query += _genSubqueryRte(genCtx)
query += " AS avgsub"
else:
query += _genSelectExpr(genCtx)
query += _genFromExpr(genCtx)
choseOrderby = False
if getConfig().orderby and shouldSelectThatBranch():
query += _genOrderBy(genCtx)
choseOrderby = True
if getConfig().limit and shouldSelectThatBranch():
if not choseOrderby and getConfig().forceOrderbyWithLimit:
query += _genOrderBy(genCtx)
query += _genLimit(genCtx)
return query
def _genOrderBy(genCtx):
# 'ORDER BY' DistColName
query = ""
query += " ORDER BY "
query += getConfig().commonColName + " "
return query
def _genLimit(genCtx):
# 'LIMIT' 'random()'
query = ""
query += " LIMIT "
(fromVal, toVal) = getConfig().limitRange
query += str(random.randint(fromVal, toVal))
return query
def _genSelectExpr(genCtx):
# 'SELECT' 'curAlias()' '.' DistColName
query = ""
query += " SELECT "
commonColName = getConfig().commonColName
query += genCtx.curAlias() + "." + commonColName + " "
return query
def _genFromExpr(genCtx):
# 'FROM' (Rte JoinList JoinOp Rte Using || RteList) ['WHERE' 'nextRandomAlias()' '.' DistColName RestrictExpr]
query = ""
query += " FROM "
if shouldSelectThatBranch():
query += _genRte(genCtx)
query += _genJoinList(genCtx)
query += randomJoinOp()
query += _genRte(genCtx)
query += _genUsing(genCtx)
else:
query += _genRteList(genCtx)
alias = genCtx.removeLastAlias()
if shouldSelectThatBranch():
query += " WHERE "
query += alias + "." + getConfig().commonColName
query += _genRestrictExpr(genCtx)
return query
def _genRestrictExpr(genCtx):
# ('<' || '>' || '=') Int || ['NOT'] 'IN' '(' SubqueryRte ')'
query = ""
if (
not getConfig().semiAntiJoin
or not genCtx.canGenerateNewRte()
or shouldSelectThatBranch()
):
query += randomRestrictOp()
(fromVal, toVal) = getConfig().filterRange
query += str(random.randint(fromVal, toVal))
else:
if shouldSelectThatBranch():
query += " NOT"
query += " IN "
query += _genSubqueryRte(genCtx)
return query
def _genCteList(genCtx):
# Cte [',' CteList] || Cte
query = ""
if shouldSelectThatBranch():
query += _genCte(genCtx)
if not genCtx.canGenerateNewCte():
return query
query += ","
query += _genCteList(genCtx)
else:
query += _genCte(genCtx)
return query
def _genCte(genCtx):
# 'nextRandomAlias()' 'AS' '(' Query ')'
query = ""
query += genCtx.curCteAlias()
genCtx.currentCteCount += 1
query += " AS "
query += " ( "
query += _genQuery(genCtx)
query += " ) "
return query
def _genRteList(genCtx):
# RteList -> Rte [, RteList] || Rte
query = ""
if getConfig().cartesianProduct and shouldSelectThatBranch():
query += _genRte(genCtx)
if not genCtx.canGenerateNewRte():
return query
if genCtx.insideCte and not genCtx.canGenerateNewRteInsideCte():
return query
query += ","
query += _genRteList(genCtx)
else:
query += _genRte(genCtx)
return query
def _genJoinList(genCtx):
# JoinOp Rte Using JoinList || e
query = ""
if shouldSelectThatBranch():
if not genCtx.canGenerateNewRte():
return query
if genCtx.insideCte and not genCtx.canGenerateNewRteInsideCte():
return query
query += randomJoinOp()
query += _genRte(genCtx)
query += _genUsing(genCtx)
query += _genJoinList(genCtx)
return query
def _genUsing(genCtx):
# 'USING' '(' DistColName ')'
query = ""
query += " USING (" + getConfig().commonColName + " ) "
return query
def _genRte(genCtx):
# SubqueryRte as 'nextRandomAlias()' || RelationRte as 'curAlias()' ||
# CteRte as 'curAlias()' || ValuesRte 'AS' 'nextRandomAlias()'
alias = genCtx.curAlias()
modifiedAlias = None
if genCtx.insideCte:
genCtx.currentCteRteCount += 1
else:
genCtx.currentRteCount += 1
genCtx.totalRteCount += 1
# donot dive into recursive subquery further if we hit into rte limit, replace it with relation rte
rteType = randomRteType()
if not genCtx.canGenerateNewRte():
rteType = RTEType.RELATION
# donot dive into recursive subquery further if we hit into rte in cte limit, replace it with relation rte
if genCtx.insideCte and not genCtx.canGenerateNewRteInsideCte():
rteType = RTEType.RELATION
# we cannot refer to cte if we are inside it or we donot have any cte
if (genCtx.insideCte or not genCtx.hasAnyCte()) and rteType == RTEType.CTE:
rteType = RTEType.RELATION
query = ""
if rteType == RTEType.SUBQUERY:
query += _genSubqueryRte(genCtx)
elif rteType == RTEType.RELATION:
query += _genRelationRte(genCtx)
elif rteType == RTEType.CTE:
query += _genCteRte(genCtx)
elif rteType == RTEType.VALUES:
query += _genValuesRte(genCtx)
modifiedAlias = alias + "(" + getConfig().commonColName + ") "
else:
raise BaseException("unknown RTE type")
query += " AS "
query += alias if not modifiedAlias else modifiedAlias
genCtx.addAlias(alias)
return query
def _genSubqueryRte(genCtx):
# '(' Query ')'
query = ""
query += " ( "
query += _genQuery(genCtx)
query += " ) "
return query
def _genRelationRte(genCtx):
# 'randomAllowedTableName()'
query = ""
query += genCtx.getRteNameEnforcingRteLimits()
return query
def _genCteRte(genCtx):
# 'randomCteName()'
query = ""
query += genCtx.randomCteName()
return query
def _genValuesRte(genCtx):
# '( VALUES(random()) )'
query = ""
(fromVal, toVal) = getConfig().dataRange
query += " ( VALUES(" + str(random.randint(fromVal, toVal)) + " ) ) "
return query

View File

@ -0,0 +1,39 @@
import random
from node_defs import RestrictOp
from config.config import getConfig
def shouldSelectThatBranch():
"""returns 0 or 1 randomly"""
return random.randint(0, 1)
def randomRteType():
"""returns a randomly selected RteType given at config"""
rtes = getConfig().targetRteTypes
return random.choice(rtes)
def randomJoinOp():
"""returns a randomly selected JoinOp given at config"""
joinTypes = getConfig().targetJoinTypes
return " " + random.choice(joinTypes).name + " JOIN"
def randomRestrictOp():
"""returns a randomly selected RestrictOp given at config"""
restrictOps = getConfig().targetRestrictOps
restrictOp = random.choice(restrictOps)
opText = ""
if restrictOp == RestrictOp.LT:
opText = "<"
elif restrictOp == RestrictOp.GT:
opText = ">"
elif restrictOp == RestrictOp.EQ:
opText = "="
else:
raise BaseException("Unknown restrict op")
return " " + opText + " "