mirror of https://github.com/citusdata/citus.git
Query generator test tool (#6686)
- Query generator is used to create queries, allowed by the grammar which is documented at `query_generator/query_gen.py` (currently contains only joins). - This PR adds a CI test which utilizes the query generator to compare the results of generated queries that are executed on Citus tables and local (undistributed) tables. It fails if there is an unexpected error at results. The error can be related to Citus, the query generator, or even Postgres. - The tool is configured by the file `query_generator/config/config.yaml`, which limits table counts at generated queries and sets many table related parameters (e.g. row count). - Run time of the CI task can be configured from the config file. By default, we run 250 queries with maximum table count of 40 inside each query.pull/6829/head
parent
08e2820c67
commit
a6a7271e63
|
@ -6,7 +6,7 @@ orbs:
|
|||
parameters:
|
||||
image_suffix:
|
||||
type: string
|
||||
default: '-v3417e8d'
|
||||
default: '-vabeb997'
|
||||
pg13_version:
|
||||
type: string
|
||||
default: '13.10'
|
||||
|
@ -421,6 +421,63 @@ jobs:
|
|||
- coverage:
|
||||
flags: 'test_<< parameters.pg_major >>,upgrade'
|
||||
|
||||
test-query-generator:
|
||||
description: Expects that the generated queries that are run on distributed and local tables would have the same results
|
||||
parameters:
|
||||
pg_major:
|
||||
description: 'postgres major version'
|
||||
type: integer
|
||||
image:
|
||||
description: 'docker image to use as for the tests'
|
||||
type: string
|
||||
default: citus/failtester
|
||||
image_tag:
|
||||
description: 'docker image tag to use'
|
||||
type: string
|
||||
docker:
|
||||
- image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>'
|
||||
working_directory: /home/circleci/project
|
||||
steps:
|
||||
- checkout
|
||||
- attach_workspace:
|
||||
at: .
|
||||
- install_extension:
|
||||
pg_major: << parameters.pg_major >>
|
||||
- configure
|
||||
- enable_core
|
||||
- run:
|
||||
name: 'Run Test'
|
||||
command: |
|
||||
gosu circleci make -C src/test/regress check-query-generator
|
||||
no_output_timeout: 5m
|
||||
- run:
|
||||
name: 'Show regressions'
|
||||
command: |
|
||||
find src/test/regress/citus_tests/query_generator/out/ -name "local_dist.diffs" -exec cat {} +
|
||||
lines=$(find src/test/regress/citus_tests/query_generator/out/ -name "local_dist.diffs" | wc -l)
|
||||
if [ $lines -ne 0 ]; then
|
||||
exit 1
|
||||
fi
|
||||
when: on_fail
|
||||
- run:
|
||||
name: 'Copy logfiles'
|
||||
command: |
|
||||
mkdir src/test/regress/tmp_citus_test/logfiles
|
||||
find src/test/regress/tmp_citus_test/ -name "logfile_*" -exec cp -t src/test/regress/tmp_citus_test/logfiles/ {} +
|
||||
when: on_fail
|
||||
- store_artifacts:
|
||||
name: 'Save logfiles'
|
||||
path: src/test/regress/tmp_citus_test/logfiles
|
||||
- store_artifacts:
|
||||
name: 'Save ddls'
|
||||
path: src/test/regress/citus_tests/query_generator/out/ddls.sql
|
||||
- store_artifacts:
|
||||
name: 'Save dmls'
|
||||
path: src/test/regress/citus_tests/query_generator/out/queries.sql
|
||||
- stack_trace
|
||||
- coverage:
|
||||
flags: 'test_<< parameters.pg_major >>,querygen'
|
||||
|
||||
test-citus:
|
||||
description: Runs the common tests of citus
|
||||
parameters:
|
||||
|
@ -935,6 +992,24 @@ workflows:
|
|||
image_tag: '<< pipeline.parameters.pg15_version >>'
|
||||
requires: [build-15]
|
||||
|
||||
- test-query-generator:
|
||||
name: 'test-13_check-query-generator'
|
||||
pg_major: 13
|
||||
image_tag: '<< pipeline.parameters.pg13_version >>'
|
||||
requires: [build-13]
|
||||
|
||||
- test-query-generator:
|
||||
name: 'test-14_check-query-generator'
|
||||
pg_major: 14
|
||||
image_tag: '<< pipeline.parameters.pg14_version >>'
|
||||
requires: [build-14]
|
||||
|
||||
- test-query-generator:
|
||||
name: 'test-15_check-query-generator'
|
||||
pg_major: 15
|
||||
image_tag: '<< pipeline.parameters.pg15_version >>'
|
||||
requires: [build-15]
|
||||
|
||||
- test-pg-upgrade:
|
||||
name: 'test-13-14_check-pg-upgrade'
|
||||
old_pg_major: 13
|
||||
|
@ -975,6 +1050,7 @@ workflows:
|
|||
- test-13_check-enterprise-failure
|
||||
- test-13_check-split
|
||||
- test-13_check-arbitrary-configs
|
||||
- test-13_check-query-generator
|
||||
- test-14_check-multi
|
||||
- test-14_check-multi-1
|
||||
- test-14_check-mx
|
||||
|
@ -993,6 +1069,7 @@ workflows:
|
|||
- test-14_check-enterprise-failure
|
||||
- test-14_check-split
|
||||
- test-14_check-arbitrary-configs
|
||||
- test-14_check-query-generator
|
||||
- test-15_check-multi
|
||||
- test-15_check-multi-1
|
||||
- test-15_check-mx
|
||||
|
@ -1011,6 +1088,7 @@ workflows:
|
|||
- test-15_check-enterprise-failure
|
||||
- test-15_check-split
|
||||
- test-15_check-arbitrary-configs
|
||||
- test-15_check-query-generator
|
||||
- test-13-14_check-pg-upgrade
|
||||
- test-14-15_check-pg-upgrade
|
||||
- test-13_check-citus-upgrade
|
||||
|
|
|
@ -34,6 +34,7 @@ MULTI_REGRESS_OPTS = --inputdir=$(citus_abs_srcdir) $(pg_regress_locale_flags) -
|
|||
pg_upgrade_check = $(citus_abs_srcdir)/citus_tests/upgrade/pg_upgrade_test.py
|
||||
citus_upgrade_check =CITUS_OLD_VERSION=$(citus-old-version) $(citus_abs_srcdir)/citus_tests/upgrade/citus_upgrade_test.py
|
||||
arbitrary_config_check = $(citus_abs_srcdir)/citus_tests/arbitrary_configs/citus_arbitrary_configs.py
|
||||
query_generator_check = $(citus_abs_srcdir)/citus_tests/query_generator/bin/run_query_compare_test.py
|
||||
|
||||
template_isolation_files = $(shell find $(citus_abs_srcdir)/spec/ -name '*.spec')
|
||||
generated_isolation_files = $(patsubst $(citus_abs_srcdir)/spec/%,$(citus_abs_srcdir)/build/specs/%,$(template_isolation_files))
|
||||
|
@ -44,7 +45,7 @@ vanilla_diffs_file = $(citus_abs_srcdir)/pg_vanilla_outputs/$(MAJORVERSION)/regr
|
|||
# intermediate, for muscle memory backward compatibility.
|
||||
check: check-full check-enterprise-full
|
||||
# check-full triggers all tests that ought to be run routinely
|
||||
check-full: check-multi check-multi-mx check-multi-1 check-operations check-follower-cluster check-isolation check-failure check-split check-vanilla check-columnar check-columnar-isolation check-pg-upgrade check-arbitrary-configs check-citus-upgrade check-citus-upgrade-mixed check-citus-upgrade-local check-citus-upgrade-mixed-local check-pytest
|
||||
check-full: check-multi check-multi-mx check-multi-1 check-operations check-follower-cluster check-isolation check-failure check-split check-vanilla check-columnar check-columnar-isolation check-pg-upgrade check-arbitrary-configs check-citus-upgrade check-citus-upgrade-mixed check-citus-upgrade-local check-citus-upgrade-mixed-local check-pytest check-query-generator
|
||||
# check-enterprise-full triggers all enterprise specific tests
|
||||
check-enterprise-full: check-enterprise check-enterprise-isolation check-enterprise-failure check-enterprise-isolation-logicalrep-1 check-enterprise-isolation-logicalrep-2 check-enterprise-isolation-logicalrep-3
|
||||
|
||||
|
@ -262,6 +263,9 @@ check-arbitrary-base: all
|
|||
check-pytest:
|
||||
pytest -n auto
|
||||
|
||||
check-query-generator: all
|
||||
${query_generator_check} --bindir=$(bindir) --pgxsdir=$(pgxsdir)
|
||||
|
||||
check-citus-upgrade: all
|
||||
$(citus_upgrade_check) \
|
||||
--bindir=$(bindir) \
|
||||
|
|
|
@ -15,6 +15,7 @@ pytest-asyncio = "*"
|
|||
pytest-timeout = "*"
|
||||
pytest-xdist = "*"
|
||||
pytest-repeat = "*"
|
||||
pyyaml = "*"
|
||||
|
||||
[dev-packages]
|
||||
black = "*"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "eb9ca3a7b05e76c7ac60179a1755f89600dfb215e02bf08c258d548df1d96025"
|
||||
"sha256": "9568b1f3e4d4fd408e5e263f6346b0a4f479ac88e02f64bb79a9d482096e6a03"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
|
@ -24,14 +24,6 @@
|
|||
"markers": "python_version >= '3.6'",
|
||||
"version": "==3.4.1"
|
||||
},
|
||||
"attrs": {
|
||||
"hashes": [
|
||||
"sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836",
|
||||
"sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==22.2.0"
|
||||
},
|
||||
"blinker": {
|
||||
"hashes": [
|
||||
"sha256:471aee25f3992bd325afa3772f1063dbdbbca947a041b8b89466dc00d606f8b6"
|
||||
|
@ -219,32 +211,28 @@
|
|||
},
|
||||
"cryptography": {
|
||||
"hashes": [
|
||||
"sha256:103e8f7155f3ce2ffa0049fe60169878d47a4364b277906386f8de21c9234aa1",
|
||||
"sha256:23df8ca3f24699167daf3e23e51f7ba7334d504af63a94af468f468b975b7dd7",
|
||||
"sha256:2725672bb53bb92dc7b4150d233cd4b8c59615cd8288d495eaa86db00d4e5c06",
|
||||
"sha256:30b1d1bfd00f6fc80d11300a29f1d8ab2b8d9febb6ed4a38a76880ec564fae84",
|
||||
"sha256:35d658536b0a4117c885728d1a7032bdc9a5974722ae298d6c533755a6ee3915",
|
||||
"sha256:50cadb9b2f961757e712a9737ef33d89b8190c3ea34d0fb6675e00edbe35d074",
|
||||
"sha256:5f8c682e736513db7d04349b4f6693690170f95aac449c56f97415c6980edef5",
|
||||
"sha256:6236a9610c912b129610eb1a274bdc1350b5df834d124fa84729ebeaf7da42c3",
|
||||
"sha256:788b3921d763ee35dfdb04248d0e3de11e3ca8eb22e2e48fef880c42e1f3c8f9",
|
||||
"sha256:8bc0008ef798231fac03fe7d26e82d601d15bd16f3afaad1c6113771566570f3",
|
||||
"sha256:8f35c17bd4faed2bc7797d2a66cbb4f986242ce2e30340ab832e5d99ae60e011",
|
||||
"sha256:b49a88ff802e1993b7f749b1eeb31134f03c8d5c956e3c125c75558955cda536",
|
||||
"sha256:bc0521cce2c1d541634b19f3ac661d7a64f9555135e9d8af3980965be717fd4a",
|
||||
"sha256:bc5b871e977c8ee5a1bbc42fa8d19bcc08baf0c51cbf1586b0e87a2694dde42f",
|
||||
"sha256:c43ac224aabcbf83a947eeb8b17eaf1547bce3767ee2d70093b461f31729a480",
|
||||
"sha256:d15809e0dbdad486f4ad0979753518f47980020b7a34e9fc56e8be4f60702fac",
|
||||
"sha256:d7d84a512a59f4412ca8549b01f94be4161c94efc598bf09d027d67826beddc0",
|
||||
"sha256:e029b844c21116564b8b61216befabca4b500e6816fa9f0ba49527653cae2108",
|
||||
"sha256:e8a0772016feeb106efd28d4a328e77dc2edae84dfbac06061319fdb669ff828",
|
||||
"sha256:e944fe07b6f229f4c1a06a7ef906a19652bdd9fd54c761b0ff87e83ae7a30354",
|
||||
"sha256:eb40fe69cfc6f5cdab9a5ebd022131ba21453cf7b8a7fd3631f45bbf52bed612",
|
||||
"sha256:fa507318e427169ade4e9eccef39e9011cdc19534f55ca2f36ec3f388c1f70f3",
|
||||
"sha256:ffd394c7896ed7821a6d13b24657c6a34b6e2650bd84ae063cf11ccffa4f1a97"
|
||||
"sha256:05dc219433b14046c476f6f09d7636b92a1c3e5808b9a6536adf4932b3b2c440",
|
||||
"sha256:0dcca15d3a19a66e63662dc8d30f8036b07be851a8680eda92d079868f106288",
|
||||
"sha256:142bae539ef28a1c76794cca7f49729e7c54423f615cfd9b0b1fa90ebe53244b",
|
||||
"sha256:3daf9b114213f8ba460b829a02896789751626a2a4e7a43a28ee77c04b5e4958",
|
||||
"sha256:48f388d0d153350f378c7f7b41497a54ff1513c816bcbbcafe5b829e59b9ce5b",
|
||||
"sha256:4df2af28d7bedc84fe45bd49bc35d710aede676e2a4cb7fc6d103a2adc8afe4d",
|
||||
"sha256:4f01c9863da784558165f5d4d916093737a75203a5c5286fde60e503e4276c7a",
|
||||
"sha256:7a38250f433cd41df7fcb763caa3ee9362777fdb4dc642b9a349721d2bf47404",
|
||||
"sha256:8f79b5ff5ad9d3218afb1e7e20ea74da5f76943ee5edb7f76e56ec5161ec782b",
|
||||
"sha256:956ba8701b4ffe91ba59665ed170a2ebbdc6fc0e40de5f6059195d9f2b33ca0e",
|
||||
"sha256:a04386fb7bc85fab9cd51b6308633a3c271e3d0d3eae917eebab2fac6219b6d2",
|
||||
"sha256:a95f4802d49faa6a674242e25bfeea6fc2acd915b5e5e29ac90a32b1139cae1c",
|
||||
"sha256:adc0d980fd2760c9e5de537c28935cc32b9353baaf28e0814df417619c6c8c3b",
|
||||
"sha256:aecbb1592b0188e030cb01f82d12556cf72e218280f621deed7d806afd2113f9",
|
||||
"sha256:b12794f01d4cacfbd3177b9042198f3af1c856eedd0a98f10f141385c809a14b",
|
||||
"sha256:c0764e72b36a3dc065c155e5b22f93df465da9c39af65516fe04ed3c68c92636",
|
||||
"sha256:c33c0d32b8594fa647d2e01dbccc303478e16fdd7cf98652d5b3ed11aa5e5c99",
|
||||
"sha256:cbaba590180cba88cb99a5f76f90808a624f18b169b90a4abb40c1fd8c19420e",
|
||||
"sha256:d5a1bd0e9e2031465761dfa920c16b0065ad77321d8a8c1f5ee331021fda65e9"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==39.0.2"
|
||||
"version": "==40.0.2"
|
||||
},
|
||||
"docopt": {
|
||||
"hashes": [
|
||||
|
@ -271,11 +259,11 @@
|
|||
},
|
||||
"filelock": {
|
||||
"hashes": [
|
||||
"sha256:3199fd0d3faea8b911be52b663dfccceb84c95949dd13179aa21436d1a79c4ce",
|
||||
"sha256:e90b34656470756edf8b19656785c5fea73afa1953f3e1b0d645cef11cab3182"
|
||||
"sha256:ad98852315c2ab702aeb628412cbf7e95b7ce8c3bf9565670b4eaecf1db370a9",
|
||||
"sha256:fc03ae43288c013d2ea83c8597001b1129db351aad9c57fe2409327916b8e718"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.10.0"
|
||||
"version": "==3.12.0"
|
||||
},
|
||||
"flask": {
|
||||
"hashes": [
|
||||
|
@ -488,11 +476,11 @@
|
|||
},
|
||||
"packaging": {
|
||||
"hashes": [
|
||||
"sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2",
|
||||
"sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"
|
||||
"sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61",
|
||||
"sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==23.0"
|
||||
"version": "==23.1"
|
||||
},
|
||||
"passlib": {
|
||||
"hashes": [
|
||||
|
@ -553,21 +541,11 @@
|
|||
},
|
||||
"pyasn1": {
|
||||
"hashes": [
|
||||
"sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359",
|
||||
"sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576",
|
||||
"sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf",
|
||||
"sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7",
|
||||
"sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d",
|
||||
"sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00",
|
||||
"sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8",
|
||||
"sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86",
|
||||
"sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12",
|
||||
"sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776",
|
||||
"sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba",
|
||||
"sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2",
|
||||
"sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3"
|
||||
"sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57",
|
||||
"sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde"
|
||||
],
|
||||
"version": "==0.4.8"
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
|
||||
"version": "==0.5.0"
|
||||
},
|
||||
"pycparser": {
|
||||
"hashes": [
|
||||
|
@ -578,11 +556,11 @@
|
|||
},
|
||||
"pyopenssl": {
|
||||
"hashes": [
|
||||
"sha256:c1cc5f86bcacefc84dada7d31175cae1b1518d5f60d3d0bb595a67822a868a6f",
|
||||
"sha256:df5fc28af899e74e19fccb5510df423581047e10ab6f1f4ba1763ff5fde844c0"
|
||||
"sha256:841498b9bec61623b1b6c47ebbc02367c07d60e0e195f19790817f10cc8db0b7",
|
||||
"sha256:9e0c526404a210df9d2b18cd33364beadb0dc858a739b885677bc65e105d4a4c"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==23.0.0"
|
||||
"version": "==23.1.1"
|
||||
},
|
||||
"pyparsing": {
|
||||
"hashes": [
|
||||
|
@ -600,19 +578,19 @@
|
|||
},
|
||||
"pytest": {
|
||||
"hashes": [
|
||||
"sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e",
|
||||
"sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4"
|
||||
"sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362",
|
||||
"sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==7.2.2"
|
||||
"version": "==7.3.1"
|
||||
},
|
||||
"pytest-asyncio": {
|
||||
"hashes": [
|
||||
"sha256:83cbf01169ce3e8eb71c6c278ccb0574d1a7a3bb8eaaf5e50e0ad342afb33b36",
|
||||
"sha256:f129998b209d04fcc65c96fc85c11e5316738358909a8399e93be553d7656442"
|
||||
"sha256:2b38a496aef56f56b0e87557ec313e11e1ab9276fc3863f6a7be0f1d0e415e1b",
|
||||
"sha256:f2b3366b7cd501a4056858bd39349d5af19742aed2d81660b7998b6341c7eb9c"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.20.3"
|
||||
"version": "==0.21.0"
|
||||
},
|
||||
"pytest-repeat": {
|
||||
"hashes": [
|
||||
|
@ -638,6 +616,52 @@
|
|||
"index": "pypi",
|
||||
"version": "==3.2.1"
|
||||
},
|
||||
"pyyaml": {
|
||||
"hashes": [
|
||||
"sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf",
|
||||
"sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293",
|
||||
"sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b",
|
||||
"sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57",
|
||||
"sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b",
|
||||
"sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4",
|
||||
"sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07",
|
||||
"sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba",
|
||||
"sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9",
|
||||
"sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287",
|
||||
"sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513",
|
||||
"sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0",
|
||||
"sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782",
|
||||
"sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0",
|
||||
"sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92",
|
||||
"sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f",
|
||||
"sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2",
|
||||
"sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc",
|
||||
"sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1",
|
||||
"sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c",
|
||||
"sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86",
|
||||
"sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4",
|
||||
"sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c",
|
||||
"sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34",
|
||||
"sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b",
|
||||
"sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d",
|
||||
"sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c",
|
||||
"sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb",
|
||||
"sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7",
|
||||
"sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737",
|
||||
"sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3",
|
||||
"sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d",
|
||||
"sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358",
|
||||
"sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53",
|
||||
"sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78",
|
||||
"sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803",
|
||||
"sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a",
|
||||
"sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f",
|
||||
"sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174",
|
||||
"sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==6.0"
|
||||
},
|
||||
"ruamel.yaml": {
|
||||
"hashes": [
|
||||
"sha256:1a771fc92d3823682b7f0893ad56cb5a5c87c48e62b5399d6f42c8759a583b33",
|
||||
|
@ -705,20 +729,20 @@
|
|||
},
|
||||
"tornado": {
|
||||
"hashes": [
|
||||
"sha256:1d54d13ab8414ed44de07efecb97d4ef7c39f7438cf5e976ccd356bebb1b5fca",
|
||||
"sha256:20f638fd8cc85f3cbae3c732326e96addff0a15e22d80f049e00121651e82e72",
|
||||
"sha256:5c87076709343557ef8032934ce5f637dbb552efa7b21d08e89ae7619ed0eb23",
|
||||
"sha256:5f8c52d219d4995388119af7ccaa0bcec289535747620116a58d830e7c25d8a8",
|
||||
"sha256:6fdfabffd8dfcb6cf887428849d30cf19a3ea34c2c248461e1f7d718ad30b66b",
|
||||
"sha256:87dcafae3e884462f90c90ecc200defe5e580a7fbbb4365eda7c7c1eb809ebc9",
|
||||
"sha256:9b630419bde84ec666bfd7ea0a4cb2a8a651c2d5cccdbdd1972a0c859dfc3c13",
|
||||
"sha256:b8150f721c101abdef99073bf66d3903e292d851bee51910839831caba341a75",
|
||||
"sha256:ba09ef14ca9893954244fd872798b4ccb2367c165946ce2dd7376aebdde8e3ac",
|
||||
"sha256:d3a2f5999215a3a06a4fc218026cd84c61b8b2b40ac5296a6db1f1451ef04c1e",
|
||||
"sha256:e5f923aa6a47e133d1cf87d60700889d7eae68988704e20c75fb2d65677a8e4b"
|
||||
"sha256:4546003dc8b5733489139d3bff5fa6a0211be505faf819bd9970e7c2b32e8122",
|
||||
"sha256:4d349846931557b7ec92f224b5d598b160e2ba26ae1812480b42e9622c884bf7",
|
||||
"sha256:6164571f5b9f73143d1334df4584cb9ac86d20c461e17b6c189a19ead8bb93c1",
|
||||
"sha256:6cfff1e9c15c79e106b8352269d201f8fc0815914a6260f3893ca18b724ea94b",
|
||||
"sha256:720f53e6367b38190ae7fa398c25c086c69d88b3c6535bd6021a126b727fb5cd",
|
||||
"sha256:912df5712024564e362ecce43c8d5862e14c78c8dd3846c9d889d44fbd7f4951",
|
||||
"sha256:c37b6a384d54ce6a31168d40ab21ad2591ddaf34973075cc0cad154402ecd9e8",
|
||||
"sha256:c659ab04d5aa477dbe44152c67d93f3ad3243b992d94f795ca1d5c73c37337ce",
|
||||
"sha256:c9114a61a4588c09065b9996ae05462350d17160b92b9bf9a1e93689cc0424dc",
|
||||
"sha256:d68f3192936ff2c4add04dc21a436a43b4408d466746b78bb2b9d0a53a18683f",
|
||||
"sha256:d7b737e18f701de3e4a3b0824260b4d740e4d60607b8089bb80e80ffd464780e"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==6.2"
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==6.3"
|
||||
},
|
||||
"typing-extensions": {
|
||||
"hashes": [
|
||||
|
@ -808,42 +832,42 @@
|
|||
"develop": {
|
||||
"attrs": {
|
||||
"hashes": [
|
||||
"sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836",
|
||||
"sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"
|
||||
"sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04",
|
||||
"sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==22.2.0"
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==23.1.0"
|
||||
},
|
||||
"black": {
|
||||
"hashes": [
|
||||
"sha256:0052dba51dec07ed029ed61b18183942043e00008ec65d5028814afaab9a22fd",
|
||||
"sha256:0680d4380db3719ebcfb2613f34e86c8e6d15ffeabcf8ec59355c5e7b85bb555",
|
||||
"sha256:121ca7f10b4a01fd99951234abdbd97728e1240be89fde18480ffac16503d481",
|
||||
"sha256:162e37d49e93bd6eb6f1afc3e17a3d23a823042530c37c3c42eeeaf026f38468",
|
||||
"sha256:2a951cc83ab535d248c89f300eccbd625e80ab880fbcfb5ac8afb5f01a258ac9",
|
||||
"sha256:2bf649fda611c8550ca9d7592b69f0637218c2369b7744694c5e4902873b2f3a",
|
||||
"sha256:382998821f58e5c8238d3166c492139573325287820963d2f7de4d518bd76958",
|
||||
"sha256:49f7b39e30f326a34b5c9a4213213a6b221d7ae9d58ec70df1c4a307cf2a1580",
|
||||
"sha256:57c18c5165c1dbe291d5306e53fb3988122890e57bd9b3dcb75f967f13411a26",
|
||||
"sha256:7a0f701d314cfa0896b9001df70a530eb2472babb76086344e688829efd97d32",
|
||||
"sha256:8178318cb74f98bc571eef19068f6ab5613b3e59d4f47771582f04e175570ed8",
|
||||
"sha256:8b70eb40a78dfac24842458476135f9b99ab952dd3f2dab738c1881a9b38b753",
|
||||
"sha256:9880d7d419bb7e709b37e28deb5e68a49227713b623c72b2b931028ea65f619b",
|
||||
"sha256:9afd3f493666a0cd8f8df9a0200c6359ac53940cbde049dcb1a7eb6ee2dd7074",
|
||||
"sha256:a29650759a6a0944e7cca036674655c2f0f63806ddecc45ed40b7b8aa314b651",
|
||||
"sha256:a436e7881d33acaf2536c46a454bb964a50eff59b21b51c6ccf5a40601fbef24",
|
||||
"sha256:a59db0a2094d2259c554676403fa2fac3473ccf1354c1c63eccf7ae65aac8ab6",
|
||||
"sha256:a8471939da5e824b891b25751955be52ee7f8a30a916d570a5ba8e0f2eb2ecad",
|
||||
"sha256:b0bd97bea8903f5a2ba7219257a44e3f1f9d00073d6cc1add68f0beec69692ac",
|
||||
"sha256:b6a92a41ee34b883b359998f0c8e6eb8e99803aa8bf3123bf2b2e6fec505a221",
|
||||
"sha256:bb460c8561c8c1bec7824ecbc3ce085eb50005883a6203dcfb0122e95797ee06",
|
||||
"sha256:bfffba28dc52a58f04492181392ee380e95262af14ee01d4bc7bb1b1c6ca8d27",
|
||||
"sha256:c1c476bc7b7d021321e7d93dc2cbd78ce103b84d5a4cf97ed535fbc0d6660648",
|
||||
"sha256:c91dfc2c2a4e50df0026f88d2215e166616e0c80e86004d0003ece0488db2739",
|
||||
"sha256:e6663f91b6feca5d06f2ccd49a10f254f9298cc1f7f49c46e498a0771b507104"
|
||||
"sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5",
|
||||
"sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915",
|
||||
"sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326",
|
||||
"sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940",
|
||||
"sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b",
|
||||
"sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30",
|
||||
"sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c",
|
||||
"sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c",
|
||||
"sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab",
|
||||
"sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27",
|
||||
"sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2",
|
||||
"sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961",
|
||||
"sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9",
|
||||
"sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb",
|
||||
"sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70",
|
||||
"sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331",
|
||||
"sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2",
|
||||
"sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266",
|
||||
"sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d",
|
||||
"sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6",
|
||||
"sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b",
|
||||
"sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925",
|
||||
"sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8",
|
||||
"sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4",
|
||||
"sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==23.1.0"
|
||||
"version": "==23.3.0"
|
||||
},
|
||||
"click": {
|
||||
"hashes": [
|
||||
|
@ -863,11 +887,11 @@
|
|||
},
|
||||
"flake8-bugbear": {
|
||||
"hashes": [
|
||||
"sha256:beb5c7efcd7ccc2039ef66a77bb8db925e7be3531ff1cb4d0b7030d0e2113d72",
|
||||
"sha256:e3e7f74c8a49ad3794a7183353026dabd68c74030d5f46571f84c1fb0eb79363"
|
||||
"sha256:8a218d13abd6904800970381057ce8e72cde8eea743240c4ef3ede4dd0bc9cfb",
|
||||
"sha256:ea565bdb87b96b56dc499edd6cc3ba7f695373d902a5f56c989b74fad7c7719d"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==23.3.12"
|
||||
"version": "==23.3.23"
|
||||
},
|
||||
"isort": {
|
||||
"hashes": [
|
||||
|
@ -895,11 +919,11 @@
|
|||
},
|
||||
"packaging": {
|
||||
"hashes": [
|
||||
"sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2",
|
||||
"sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"
|
||||
"sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61",
|
||||
"sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==23.0"
|
||||
"version": "==23.1"
|
||||
},
|
||||
"pathspec": {
|
||||
"hashes": [
|
||||
|
@ -911,11 +935,11 @@
|
|||
},
|
||||
"platformdirs": {
|
||||
"hashes": [
|
||||
"sha256:024996549ee88ec1a9aa99ff7f8fc819bb59e2c3477b410d90a16d32d6e707aa",
|
||||
"sha256:e5986afb596e4bb5bde29a79ac9061aa955b94fca2399b7aaac4090860920dd8"
|
||||
"sha256:d5b638ca397f25f979350ff789db335903d7ea010ab28903f57b27e1b16c2b08",
|
||||
"sha256:ebe11c0d7a805086e99506aa331612429a72ca7cd52a1f0d277dc4adc20cb10e"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==3.1.1"
|
||||
"version": "==3.2.0"
|
||||
},
|
||||
"pycodestyle": {
|
||||
"hashes": [
|
||||
|
|
|
@ -15,6 +15,8 @@ WORKER2 = "worker2"
|
|||
REGULAR_USER_NAME = "regularuser"
|
||||
SUPER_USER_NAME = "postgres"
|
||||
|
||||
DATABASE_NAME = "postgres"
|
||||
|
||||
ARBITRARY_SCHEDULE_NAMES = [
|
||||
"create_schedule",
|
||||
"sql_schedule",
|
||||
|
@ -96,6 +98,7 @@ class CitusBaseClusterConfig(object, metaclass=NewInitCaller):
|
|||
self.temp_dir = CITUS_ARBITRARY_TEST_DIR
|
||||
self.worker_amount = 2
|
||||
self.user = REGULAR_USER_NAME
|
||||
self.dbname = DATABASE_NAME
|
||||
self.is_mx = True
|
||||
self.is_citus = True
|
||||
self.name = type(self).__name__
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
__pycache__
|
||||
*-env
|
|
@ -0,0 +1,223 @@
|
|||
## Goal of the Tool
|
||||
Tool supports a simple syntax to be useful to generate queries with different join orders. Main motivation for me to create the tool was to compare results of the generated queries for different [Citus](https://github.com/citusdata/citus) tables and Postgres tables. That is why we support a basic syntax for now. It can be extended to support different queries.
|
||||
|
||||
## Query Generator for Postgres
|
||||
Tool generates SELECT queries, whose depth can be configured, with different join orders. It also generates DDLs required for query execution.
|
||||
You can also tweak configuration parameters for data inserting command generation.
|
||||
|
||||
## How to Run Citus Join Verification?
|
||||
You can verify if Citus breaks any default PG join behaviour via `citus_compare_dist_local_joins.sh`. It creates
|
||||
tables specified in config. Then, it runs generated queries on those tables and saves the results into `out/dist_queries.out`.
|
||||
After running those queries for Citus tables, it creates PG tables with the same names as previous run, executes the same
|
||||
queries, and saves the results into `out/local_queries.out`. In final step, it generates diff between local and distributed results.
|
||||
You can see the contents of `out/local_dist_diffs` to see if there is any Citus unsupported query.
|
||||
|
||||
1. Create a Citus local cluster with 2 workers by using [citus_dev](https://github.com/citusdata/tools/tree/develop/citus_dev) tool
|
||||
(Note: make sure you do not configure psql via .psqlrc file as it would fail the test.)
|
||||
```bash
|
||||
citus_dev make testCluster --destroy
|
||||
```
|
||||
2. Run the test,
|
||||
```bash
|
||||
cd src/test/regress/citus_tests/query_generator/bin
|
||||
bash citus_compare_dist_local_joins.sh <username> <dbname> <coordinator_port>
|
||||
```
|
||||
3. See the diff content in `src/test/regress/citus_tests/query_generator/out/local_dist_diffs`
|
||||
|
||||
### Configuration
|
||||
You can configure 3 different parts:
|
||||
|
||||
- [DDL Configuration](#ddl-configuration)
|
||||
- [Data Insertion Configuration](#data-insertion-configuration)
|
||||
- [Query Configuration](#query-configuration)
|
||||
|
||||
## DDL Configuration
|
||||
Tool generates related ddl commands before generating queries.
|
||||
|
||||
Schema for DDL configuration:
|
||||
```yaml
|
||||
ddlOutFile: <string>
|
||||
commonColName: <string>
|
||||
targetTables: <Table[]>
|
||||
- Table:
|
||||
name: <string>
|
||||
citusType: <CitusType>
|
||||
maxAllowedUseOnQuery: <int>
|
||||
rowCount: <int>
|
||||
nullRate: <float>
|
||||
duplicateRate: <float>
|
||||
columns: <Column[]>
|
||||
- Column:
|
||||
name: <string>
|
||||
type: <string>
|
||||
distinctCopyCount: <int>
|
||||
```
|
||||
|
||||
Explanation:
|
||||
```yaml
|
||||
ddlOutFile: "file to write generated DDL commands"
|
||||
commonColName: "name of the column that will be used as distribution column, filter column in restrictions and target column in selections"
|
||||
targetTables: "array of tables that will be used in generated queries"
|
||||
- Table:
|
||||
name: "name prefix of table"
|
||||
citusType: "citus type of table"
|
||||
maxAllowedUseOnQuery: "limits how many times table can appear in query"
|
||||
rowCount: "total # of rows that will be inserted into table"
|
||||
nullRate: "percentage of null rows in rowCount that will be inserted into table"
|
||||
duplicateRate: "percentage of duplicates in rowCount that will be inserted into table"
|
||||
columns: "array of columns in table"
|
||||
- Column:
|
||||
name: "name of column"
|
||||
type: "name of data type of column(only support 'int' now)"
|
||||
distinctCopyCount: "how many tables with the same configuration we should create(only by changing full name, still using the same name prefix)"
|
||||
```
|
||||
|
||||
|
||||
## Data Insertion Configuration
|
||||
Tool generates data insertion commands if you want tables with filled data. You can configure total number of rows, what percentage of them should
|
||||
be null and what percentage of them should be duplicated. For related configuration see Table schema at [DDL Configuration](#ddl-configuration). You
|
||||
can also configure range of the randomly generated data. See `dataRange` at [Query Configuration](#query-configuration).
|
||||
|
||||
## Query Configuration
|
||||
After generation of ddls and data insertion commands, the tool generates queries.
|
||||
|
||||
Schema for Query configuration:
|
||||
```yaml
|
||||
queryCount: <int>
|
||||
queryOutFile: <string>
|
||||
semiAntiJoin: <bool>
|
||||
cartesianProduct: <bool>
|
||||
limit: <bool>
|
||||
orderby: <bool>
|
||||
forceOrderbyWithLimit: <bool>
|
||||
useAvgAtTopLevelTarget: <bool>
|
||||
dataRange:
|
||||
from: <int>
|
||||
to: <int>
|
||||
filterRange:
|
||||
from: <int>
|
||||
to: <int>
|
||||
limitRange:
|
||||
from: <int>
|
||||
to: <int>
|
||||
targetRteCount: <int>
|
||||
targetCteCount: <int>
|
||||
targetCteRteCount: <int>
|
||||
targetJoinTypes: <JoinType[]>
|
||||
targetRteTypes: <RteType[]>
|
||||
targetRestrictOps: <RestrictOp[]>
|
||||
```
|
||||
|
||||
Explanation:
|
||||
```yaml
|
||||
queryCount: "number of queries to generate"
|
||||
queryOutFile: "file to write generated queries"
|
||||
semiAntiJoin: "should we support semi joins (WHERE col IN (Subquery))"
|
||||
cartesianProduct: "should we support cartesian joins"
|
||||
limit: "should we support limit clause"
|
||||
orderby: "should we support order by clause"
|
||||
forceOrderbyWithLimit: "should we force order by when we use limit"
|
||||
useAvgAtTopLevelTarget: "should we make top level query as select avg() from (subquery)"
|
||||
dataRange:
|
||||
from: "starting boundary for data generation"
|
||||
to: "end boundary for data generation"
|
||||
filterRange:
|
||||
from: "starting boundary for restriction clause"
|
||||
to: "end boundary for restriction clause"
|
||||
limitRange:
|
||||
from: "starting boundary for limit clause"
|
||||
to: "end boundary for data limit clause"
|
||||
targetRteCount: "limits how many rtes should exist in non-cte part of the query"
|
||||
targetCteCount: "limits how many ctes should exist in query"
|
||||
targetCteRteCount: "limits how many rtes should exist in cte part of the query"
|
||||
targetJoinTypes: "supported join types"
|
||||
targetRteTypes: "supported rte types"
|
||||
targetRestrictOps: "supported restrict ops"
|
||||
```
|
||||
|
||||
## Misc Configuration
|
||||
Tool has some configuration options which does not suit above 3 parts.
|
||||
|
||||
Schema for misc configuration:
|
||||
```yaml
|
||||
interactiveMode: <bool>
|
||||
```
|
||||
|
||||
Explanation:
|
||||
```yaml
|
||||
interactiveMode: "when true, interactively prints generated ddls and queries. Otherwise, it writes them to configured files."
|
||||
```
|
||||
|
||||
## Supported Operations
|
||||
It uses `commonColName` for any kind of target selection required for any supported query clause.
|
||||
|
||||
### Column Type Support
|
||||
Tool currently supports only int data type, but plans to support other basic types.
|
||||
|
||||
### Join Support
|
||||
Tool supports following joins:
|
||||
```yaml
|
||||
targetJoinTypes:
|
||||
- INNER
|
||||
- LEFT
|
||||
- RIGHT
|
||||
- FULL
|
||||
```
|
||||
|
||||
### Citus Table Support
|
||||
Tool supports following citus table types:
|
||||
```yaml
|
||||
targetTables:
|
||||
- Table:
|
||||
...
|
||||
citusType: <one of (DISTRIBUTED || REFERENCE || POSTGRES)>
|
||||
...
|
||||
```
|
||||
|
||||
### Restrict Operation Support
|
||||
Tool supports following restrict operations:
|
||||
```yaml
|
||||
targetRestrictOps:
|
||||
- LT
|
||||
- GT
|
||||
- EQ
|
||||
```
|
||||
|
||||
### Rte Support
|
||||
Tool supports following rtes:
|
||||
```yaml
|
||||
targetRteTypes:
|
||||
- RELATION
|
||||
- SUBQUERY
|
||||
- CTE
|
||||
- VALUES
|
||||
```
|
||||
|
||||
## How to Generate Queries?
|
||||
You have 2 different options.
|
||||
|
||||
- [Interactive Mode](#interactive-mode)
|
||||
- [File Mode](#file-mode)
|
||||
|
||||
### Interactive Mode
|
||||
In this mode, you will be prompted to continue generating a query. When you hit to `Enter`, it will continue generating them.
|
||||
You will need to hit to `x` to exit.
|
||||
|
||||
1. Configure `interactiveMode: true` in config.yml,
|
||||
2. Run the command shown below
|
||||
```bash
|
||||
cd src/test/regress/citus_tests/query_generator
|
||||
python generate_queries.py
|
||||
```
|
||||
|
||||
### File Mode
|
||||
In this mode, generated ddls and queries will be written into the files configured in [config.yml](./config/config.yaml).
|
||||
|
||||
1. Configure `interactiveMode: false`,
|
||||
2. Configure `queryCount: <total_query>`,
|
||||
3. Configure `queryOutFile: <query_file_path>` and `ddlOutFile: <ddlfile_path>`
|
||||
4. Run the command shown below
|
||||
```bash
|
||||
cd src/test/regress/citus_tests/query_generator
|
||||
python generate_queries.py
|
||||
```
|
|
@ -0,0 +1,60 @@
|
|||
#!/bin/bash
|
||||
|
||||
# make bash behave
|
||||
set -euo pipefail
|
||||
|
||||
psql_user=$1
|
||||
psql_db=$2
|
||||
psql_port=$3
|
||||
|
||||
runDDLs()
|
||||
{
|
||||
# run ddls
|
||||
psql -U "${psql_user}" -d "${psql_db}" -p "${psql_port}" -f "${out_folder}"/ddls.sql > /dev/null
|
||||
}
|
||||
|
||||
runUndistributeTables()
|
||||
{
|
||||
undistribute_all_tables_command='SELECT undistribute_table(logicalrelid) FROM pg_dist_partition;'
|
||||
# run undistribute all tables
|
||||
psql -U "${psql_user}" -d "${psql_db}" -p "${psql_port}" -c "${undistribute_all_tables_command}" > /dev/null
|
||||
}
|
||||
|
||||
runQueries()
|
||||
{
|
||||
out_filename=$1
|
||||
|
||||
# run dmls
|
||||
# echo queries and comments for query tracing
|
||||
psql -U "${psql_user}" -d "${psql_db}" -p "${psql_port}" \
|
||||
--echo-all \
|
||||
-f "${out_folder}"/queries.sql > "${out_filename}" 2>&1
|
||||
}
|
||||
|
||||
showDiffs()
|
||||
{
|
||||
pushd . && cd "${script_folder}" && python3 diff-checker.py && popd
|
||||
}
|
||||
|
||||
# run query generator and let it create output ddls and queries
|
||||
script_folder=$(dirname "$0")
|
||||
out_folder="${script_folder}"/../out
|
||||
pushd . && cd "${script_folder}"/.. && python3 generate_queries.py && popd
|
||||
|
||||
# remove result files if exists
|
||||
rm -rf "${out_folder}"/dist_queries.out "${out_folder}"/local_queries.out
|
||||
|
||||
# run ddls
|
||||
runDDLs
|
||||
|
||||
# runs dmls for distributed tables
|
||||
runQueries "${out_folder}"/dist_queries.out
|
||||
|
||||
# undistribute all dist tables
|
||||
runUndistributeTables
|
||||
|
||||
# runs the same dmls for pg local tables
|
||||
runQueries "${out_folder}"/local_queries.out
|
||||
|
||||
# see diffs in results
|
||||
showDiffs
|
|
@ -0,0 +1,122 @@
|
|||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def createPatternForFailedQueryBlock(acceptableErrors):
|
||||
totalAcceptableOrders = len(acceptableErrors)
|
||||
|
||||
failedQueryBlockPattern = "-- queryId: [0-9]+\n.*\npsql:.*(?:"
|
||||
for acceptableErrorIdx in range(totalAcceptableOrders):
|
||||
failedQueryBlockPattern += acceptableErrors[acceptableErrorIdx]
|
||||
if acceptableErrorIdx < totalAcceptableOrders - 1:
|
||||
failedQueryBlockPattern += "|"
|
||||
failedQueryBlockPattern += ")"
|
||||
|
||||
return failedQueryBlockPattern
|
||||
|
||||
|
||||
def findFailedQueriesFromFile(queryOutFile, acceptableErrors):
|
||||
failedQueryIds = []
|
||||
outFileContent = ""
|
||||
failedQueryBlockPattern = createPatternForFailedQueryBlock(acceptableErrors)
|
||||
queryIdPattern = "queryId: ([0-9]+)"
|
||||
with open(queryOutFile, "r") as f:
|
||||
outFileContent = f.read()
|
||||
failedQueryContents = re.findall(failedQueryBlockPattern, outFileContent)
|
||||
failedQueryIds = [
|
||||
re.search(queryIdPattern, failedQueryContent)[1]
|
||||
for failedQueryContent in failedQueryContents
|
||||
]
|
||||
|
||||
return failedQueryIds
|
||||
|
||||
|
||||
def removeFailedQueryOutputFromFile(outFile, failedQueryIds):
|
||||
if len(failedQueryIds) == 0:
|
||||
return
|
||||
|
||||
distOutFileContentAsLines = []
|
||||
with open(outFile, "r") as f:
|
||||
distOutFileContentAsLines = f.readlines()
|
||||
|
||||
with open(outFile, "w") as f:
|
||||
clear = False
|
||||
nextIdx = 0
|
||||
nextQueryIdToDelete = failedQueryIds[nextIdx]
|
||||
queryIdPattern = "queryId: ([0-9]+)"
|
||||
|
||||
for line in distOutFileContentAsLines:
|
||||
matched = re.search(queryIdPattern, line)
|
||||
# founded line which contains query id
|
||||
if matched:
|
||||
# query id matches with the next failed query's id
|
||||
if nextQueryIdToDelete == matched[1]:
|
||||
# clear lines until we find succesfull query
|
||||
clear = True
|
||||
nextIdx += 1
|
||||
if nextIdx < len(failedQueryIds):
|
||||
nextQueryIdToDelete = failedQueryIds[nextIdx]
|
||||
else:
|
||||
# we found successfull query
|
||||
clear = False
|
||||
|
||||
if not clear:
|
||||
f.write(line)
|
||||
return
|
||||
|
||||
|
||||
def removeFailedQueryOutputFromFiles(distQueryOutFile, localQueryOutFile):
|
||||
# remove the failed distributed query from both local and distributed query files to prevent diff for acceptable errors
|
||||
# some of generated queries fails with below errors due to https://github.com/citusdata/citus/issues/6653, we skip those until we support them
|
||||
acceptableErrors = [
|
||||
"ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns",
|
||||
"ERROR: recursive complex joins are only supported when all distributed tables are co-located and joined on their distribution columns",
|
||||
]
|
||||
failedDistQueryIds = findFailedQueriesFromFile(distQueryOutFile, acceptableErrors)
|
||||
removeFailedQueryOutputFromFile(distQueryOutFile, failedDistQueryIds)
|
||||
removeFailedQueryOutputFromFile(localQueryOutFile, failedDistQueryIds)
|
||||
return
|
||||
|
||||
|
||||
def showDiffs(distQueryOutFile, localQueryOutFile, diffFile):
|
||||
exitCode = 1
|
||||
with open(diffFile, "w") as f:
|
||||
diffCommand = "diff -u {} {}".format(localQueryOutFile, distQueryOutFile)
|
||||
process = subprocess.Popen(diffCommand.split(), stdout=f, stderr=f, shell=False)
|
||||
process.wait()
|
||||
exitCode = process.returncode
|
||||
|
||||
print("diff exit {}".format(exitCode))
|
||||
return exitCode
|
||||
|
||||
|
||||
def exitIfAnyLocalQueryFailed(localQueryOutFile):
|
||||
allErrors = ["ERROR:"]
|
||||
failedLocalQueryIds = findFailedQueriesFromFile(localQueryOutFile, allErrors)
|
||||
assert (
|
||||
len(failedLocalQueryIds) == 0
|
||||
), """There might be an internal error related to query generator or
|
||||
we might find a Postgres bug. Check local_queries.out to see the error."""
|
||||
return
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
scriptDirPath = os.path.dirname(os.path.abspath(__file__))
|
||||
outFolderPath = scriptDirPath + "/../out"
|
||||
localQueryOutFile = "{}/local_queries.out".format(outFolderPath)
|
||||
distQueryOutFile = "{}/dist_queries.out".format(outFolderPath)
|
||||
diffFile = "{}/local_dist.diffs".format(outFolderPath)
|
||||
|
||||
# exit if we have any error from local queries
|
||||
exitIfAnyLocalQueryFailed(localQueryOutFile)
|
||||
|
||||
# find failed queries from distQueryOutFile and then remove failed queries and their results from
|
||||
# both distQueryOutFile and localQueryOutFile
|
||||
removeFailedQueryOutputFromFiles(distQueryOutFile, localQueryOutFile)
|
||||
|
||||
# show diffs in unified format
|
||||
exitCode = showDiffs(distQueryOutFile, localQueryOutFile, diffFile)
|
||||
|
||||
sys.exit(exitCode)
|
|
@ -0,0 +1,63 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""query_gen_test
|
||||
Usage:
|
||||
run_query_compare_test --bindir=<bindir> --pgxsdir=<pgxsdir>
|
||||
|
||||
Options:
|
||||
--bindir=<bindir> PostgreSQL executable directory(ex: '~/.pgenv/pgsql-10.4/bin')
|
||||
--pgxsdir=<pgxsdir> Path to the PGXS directory(ex: ~/.pgenv/src/postgresql-11.3)
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from docopt import docopt
|
||||
|
||||
# https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time/14132912#14132912
|
||||
sys.path.append(
|
||||
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
)
|
||||
|
||||
|
||||
# ignore E402 because these imports require addition to path
|
||||
import common # noqa: E402
|
||||
|
||||
import config as cfg # noqa: E402
|
||||
|
||||
|
||||
def run_test(config):
|
||||
# start cluster
|
||||
common.initialize_temp_dir(cfg.CITUS_ARBITRARY_TEST_DIR)
|
||||
common.initialize_citus_cluster(
|
||||
config.bindir, config.datadir, config.settings, config
|
||||
)
|
||||
|
||||
# run test
|
||||
scriptDirPath = os.path.dirname(os.path.abspath(__file__))
|
||||
testRunCommand = "bash {}/citus_compare_dist_local_joins.sh {} {} {}".format(
|
||||
scriptDirPath, config.user, config.dbname, config.coordinator_port()
|
||||
)
|
||||
process = subprocess.Popen(
|
||||
testRunCommand.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
stdout, stderr = process.communicate()
|
||||
|
||||
# stop cluster
|
||||
common.stop_databases(
|
||||
config.bindir, config.datadir, config.node_name_to_ports, config.name
|
||||
)
|
||||
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
print(process.returncode)
|
||||
sys.exit(process.returncode)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
citusClusterConfig = cfg.CitusSuperUserDefaultClusterConfig(
|
||||
docopt(__doc__, version="run_query_compare_test")
|
||||
)
|
||||
|
||||
run_test(citusClusterConfig)
|
|
@ -0,0 +1,129 @@
|
|||
import copy
|
||||
|
||||
import yaml
|
||||
from config.config_parser import (
|
||||
parseJoinTypeArray,
|
||||
parseRange,
|
||||
parseRestrictOpArray,
|
||||
parseRteTypeArray,
|
||||
parseTableArray,
|
||||
)
|
||||
from node_defs import CitusType
|
||||
|
||||
|
||||
class Config:
|
||||
def __init__(self):
|
||||
configObj = Config.parseConfigFile("config/config.yaml")
|
||||
|
||||
self.targetTables = _distinctCopyTables(
|
||||
parseTableArray(configObj["targetTables"])
|
||||
)
|
||||
self.targetJoinTypes = parseJoinTypeArray(configObj["targetJoinTypes"])
|
||||
self.targetRteTypes = parseRteTypeArray(configObj["targetRteTypes"])
|
||||
self.targetRestrictOps = parseRestrictOpArray(configObj["targetRestrictOps"])
|
||||
self.commonColName = configObj["commonColName"]
|
||||
self.targetRteCount = configObj["targetRteCount"]
|
||||
self.targetCteCount = configObj["targetCteCount"]
|
||||
self.targetCteRteCount = configObj["targetCteRteCount"]
|
||||
self.semiAntiJoin = configObj["semiAntiJoin"]
|
||||
self.cartesianProduct = configObj["cartesianProduct"]
|
||||
self.limit = configObj["limit"]
|
||||
self.orderby = configObj["orderby"]
|
||||
self.forceOrderbyWithLimit = configObj["forceOrderbyWithLimit"]
|
||||
self.useAvgAtTopLevelTarget = configObj["useAvgAtTopLevelTarget"]
|
||||
self.interactiveMode = configObj["interactiveMode"]
|
||||
self.queryOutFile = configObj["queryOutFile"]
|
||||
self.ddlOutFile = configObj["ddlOutFile"]
|
||||
self.queryCount = configObj["queryCount"]
|
||||
self.dataRange = parseRange(configObj["dataRange"])
|
||||
self.filterRange = parseRange(configObj["filterRange"])
|
||||
self.limitRange = parseRange(configObj["limitRange"])
|
||||
self._ensureRteLimitsAreSane()
|
||||
# print(self)
|
||||
|
||||
def __repr__(self):
|
||||
rep = "targetRteCount: {}\n".format(self.targetRteCount)
|
||||
rep += "targetCteCount: {}\n".format(self.targetCteCount)
|
||||
rep += "targetCteRteCount: {}\n".format(self.targetCteRteCount)
|
||||
|
||||
rep += "targetRteTypes:\n"
|
||||
for rteType in self.targetRteTypes:
|
||||
rep += "\t{}\n".format(rteType)
|
||||
|
||||
rep += "targetJoinTypes:\n"
|
||||
for joinType in self.targetJoinTypes:
|
||||
rep += "\t{}\n".format(joinType)
|
||||
|
||||
rep += "restrictOps:\n"
|
||||
for restrictOp in self.targetRestrictOps:
|
||||
rep += "\t{}\n".format(restrictOp)
|
||||
|
||||
return rep
|
||||
|
||||
def _ensureRteLimitsAreSane(self):
|
||||
totalAllowedUseForAllTables = 0
|
||||
for table in self.targetTables:
|
||||
totalAllowedUseForAllTables += table.maxAllowedUseOnQuery
|
||||
assert (
|
||||
totalAllowedUseForAllTables >= self.targetRteCount
|
||||
), """targetRteCount cannot be greater than sum of maxAllowedUseOnQuery for all tables.
|
||||
Set targetRteCount to a lower value or increase maxAllowedUseOnQuery for tables at config.yml."""
|
||||
|
||||
@staticmethod
|
||||
def parseConfigFile(path):
|
||||
try:
|
||||
with open(path, "r") as configFile:
|
||||
return yaml.load(configFile, yaml.Loader)
|
||||
except IOError as e:
|
||||
print(f"I/O error({0}): {1}".format(e.errno, e.strerror))
|
||||
raise BaseException("cannot parse config.yaml")
|
||||
except Exception:
|
||||
print("Unexpected error while parsing config.yml.")
|
||||
|
||||
|
||||
_config = None
|
||||
|
||||
|
||||
def resetConfig():
|
||||
global _config
|
||||
_config = Config()
|
||||
|
||||
|
||||
def getConfig():
|
||||
return _config
|
||||
|
||||
|
||||
def getAllTableNames():
|
||||
"""returns table names from target tables given at config"""
|
||||
tables = getConfig().targetTables
|
||||
tableNames = [table.name for table in tables]
|
||||
return tableNames
|
||||
|
||||
|
||||
def getMaxAllowedCountForTable(tableName):
|
||||
tables = getConfig().targetTables
|
||||
filtered = filter(lambda el: el.name == tableName, tables)
|
||||
filtered = list(filtered)
|
||||
assert len(filtered) == 1
|
||||
return filtered[0].maxAllowedUseOnQuery
|
||||
|
||||
|
||||
def isTableDistributed(table):
|
||||
return table.citusType == CitusType.DISTRIBUTED
|
||||
|
||||
|
||||
def isTableReference(table):
|
||||
return table.citusType == CitusType.REFERENCE
|
||||
|
||||
|
||||
def _distinctCopyTables(tables):
|
||||
distinctCopyTables = []
|
||||
for table in tables:
|
||||
distinctCopyCount = table.distinctCopyCount
|
||||
for tblIdx in range(1, distinctCopyCount):
|
||||
distinctCopyTable = copy.deepcopy(table)
|
||||
distinctCopyTable.name += str(tblIdx)
|
||||
distinctCopyTables.append(distinctCopyTable)
|
||||
table.name += "0"
|
||||
tables.extend(distinctCopyTables)
|
||||
return tables
|
|
@ -0,0 +1,65 @@
|
|||
interactiveMode: false
|
||||
queryCount: 250
|
||||
queryOutFile: queries.sql
|
||||
ddlOutFile: ddls.sql
|
||||
semiAntiJoin: true
|
||||
cartesianProduct: false
|
||||
limit: true
|
||||
orderby: true
|
||||
forceOrderbyWithLimit: true
|
||||
useAvgAtTopLevelTarget: true
|
||||
dataRange:
|
||||
from: 0
|
||||
to: 10
|
||||
filterRange:
|
||||
from: 0
|
||||
to: 10
|
||||
limitRange:
|
||||
from: 0
|
||||
to: 10
|
||||
targetRteCount: 40
|
||||
targetCteCount: 3
|
||||
targetCteRteCount: 2
|
||||
|
||||
commonColName: id
|
||||
|
||||
targetTables:
|
||||
- Table:
|
||||
name: dist
|
||||
citusType: DISTRIBUTED
|
||||
maxAllowedUseOnQuery: 10
|
||||
rowCount: 10
|
||||
nullRate: 0.1
|
||||
duplicateRate: 0.1
|
||||
columns:
|
||||
- Column:
|
||||
name: id
|
||||
type: int
|
||||
distinctCopyCount: 2
|
||||
- Table:
|
||||
name: ref
|
||||
citusType: REFERENCE
|
||||
maxAllowedUseOnQuery: 10
|
||||
rowCount: 10
|
||||
nullRate: 0.1
|
||||
duplicateRate: 0.1
|
||||
columns:
|
||||
- Column:
|
||||
name: id
|
||||
type: int
|
||||
distinctCopyCount: 2
|
||||
|
||||
targetJoinTypes:
|
||||
- INNER
|
||||
- LEFT
|
||||
- RIGHT
|
||||
- FULL
|
||||
|
||||
targetRteTypes:
|
||||
- RELATION
|
||||
- SUBQUERY
|
||||
- CTE
|
||||
|
||||
targetRestrictOps:
|
||||
- LT
|
||||
- GT
|
|
@ -0,0 +1,81 @@
|
|||
from node_defs import CitusType, Column, JoinType, RestrictOp, RTEType, Table
|
||||
|
||||
|
||||
def parseJoinType(joinTypeText):
|
||||
return JoinType[joinTypeText]
|
||||
|
||||
|
||||
def parseJoinTypeArray(joinTypeTexts):
|
||||
joinTypes = []
|
||||
for joinTypeText in joinTypeTexts:
|
||||
joinType = parseJoinType(joinTypeText)
|
||||
joinTypes.append(joinType)
|
||||
return joinTypes
|
||||
|
||||
|
||||
def parseRteType(rteTypeText):
|
||||
return RTEType[rteTypeText]
|
||||
|
||||
|
||||
def parseRteTypeArray(rteTypeTexts):
|
||||
rteTypes = []
|
||||
for rteTypeText in rteTypeTexts:
|
||||
rteType = parseRteType(rteTypeText)
|
||||
rteTypes.append(rteType)
|
||||
return rteTypes
|
||||
|
||||
|
||||
def parseRestrictOp(restrictOpText):
|
||||
return RestrictOp[restrictOpText]
|
||||
|
||||
|
||||
def parseRestrictOpArray(restrictOpTexts):
|
||||
restrictOps = []
|
||||
for restrictOpText in restrictOpTexts:
|
||||
restrictOp = parseRestrictOp(restrictOpText)
|
||||
restrictOps.append(restrictOp)
|
||||
return restrictOps
|
||||
|
||||
|
||||
def parseTable(targetTableDict):
|
||||
name = targetTableDict["name"]
|
||||
citusType = CitusType[targetTableDict["citusType"]]
|
||||
maxAllowedUseOnQuery = targetTableDict["maxAllowedUseOnQuery"]
|
||||
rowCount = targetTableDict["rowCount"]
|
||||
nullRate = targetTableDict["nullRate"]
|
||||
duplicateRate = targetTableDict["duplicateRate"]
|
||||
columns = []
|
||||
for columnDict in targetTableDict["columns"]:
|
||||
col = parseColumn(columnDict)
|
||||
columns.append(col)
|
||||
distinctCopyCount = targetTableDict["distinctCopyCount"]
|
||||
return Table(
|
||||
name,
|
||||
citusType,
|
||||
maxAllowedUseOnQuery,
|
||||
rowCount,
|
||||
nullRate,
|
||||
duplicateRate,
|
||||
columns,
|
||||
distinctCopyCount,
|
||||
)
|
||||
|
||||
|
||||
def parseTableArray(targetTableDicts):
|
||||
tables = []
|
||||
for targetTableDict in targetTableDicts:
|
||||
table = parseTable(targetTableDict["Table"])
|
||||
tables.append(table)
|
||||
return tables
|
||||
|
||||
|
||||
def parseColumn(targetColumnDict):
|
||||
name = targetColumnDict["name"]
|
||||
type = targetColumnDict["type"]
|
||||
return Column(name, type)
|
||||
|
||||
|
||||
def parseRange(rangeDict):
|
||||
fromVal = rangeDict["from"]
|
||||
toVal = rangeDict["to"]
|
||||
return (fromVal, toVal)
|
|
@ -0,0 +1,81 @@
|
|||
from node_defs import CitusType
|
||||
|
||||
from config.config import getConfig
|
||||
|
||||
|
||||
def getTableData():
|
||||
dataGenerationSql = ""
|
||||
|
||||
tableIdx = 1
|
||||
(fromVal, toVal) = getConfig().dataRange
|
||||
tables = getConfig().targetTables
|
||||
for table in tables:
|
||||
# generate base rows
|
||||
dataGenerationSql += _genOverlappingData(table.name, fromVal, table.rowCount)
|
||||
dataGenerationSql += _genNonOverlappingData(table.name, toVal, tableIdx)
|
||||
dataGenerationSql += "\n"
|
||||
|
||||
# generate null rows
|
||||
if not table.citusType == CitusType.DISTRIBUTED:
|
||||
targetNullRows = int(table.rowCount * table.nullRate)
|
||||
dataGenerationSql += _genNullData(table.name, targetNullRows)
|
||||
dataGenerationSql += "\n"
|
||||
|
||||
# generate duplicate rows
|
||||
targetDuplicateRows = int(table.rowCount * table.duplicateRate)
|
||||
dataGenerationSql += _genDupData(table.name, targetDuplicateRows)
|
||||
dataGenerationSql += "\n\n"
|
||||
tableIdx += 1
|
||||
return dataGenerationSql
|
||||
|
||||
|
||||
def _genOverlappingData(tableName, startVal, rowCount):
|
||||
"""returns string to fill table with [startVal,startVal+rowCount] range of integers"""
|
||||
dataGenerationSql = ""
|
||||
dataGenerationSql += "INSERT INTO " + tableName
|
||||
dataGenerationSql += (
|
||||
" SELECT i FROM generate_series("
|
||||
+ str(startVal)
|
||||
+ ","
|
||||
+ str(startVal + rowCount)
|
||||
+ ") i;"
|
||||
)
|
||||
return dataGenerationSql
|
||||
|
||||
|
||||
def _genNullData(tableName, nullCount):
|
||||
"""returns string to fill table with NULLs"""
|
||||
dataGenerationSql = ""
|
||||
dataGenerationSql += "INSERT INTO " + tableName
|
||||
dataGenerationSql += (
|
||||
" SELECT NULL FROM generate_series(0," + str(nullCount) + ") i;"
|
||||
)
|
||||
return dataGenerationSql
|
||||
|
||||
|
||||
def _genDupData(tableName, dupRowCount):
|
||||
"""returns string to fill table with duplicate integers which are fetched from given table"""
|
||||
dataGenerationSql = ""
|
||||
dataGenerationSql += "INSERT INTO " + tableName
|
||||
dataGenerationSql += (
|
||||
" SELECT * FROM "
|
||||
+ tableName
|
||||
+ " ORDER BY "
|
||||
+ getConfig().commonColName
|
||||
+ " LIMIT "
|
||||
+ str(dupRowCount)
|
||||
+ ";"
|
||||
)
|
||||
return dataGenerationSql
|
||||
|
||||
|
||||
def _genNonOverlappingData(tableName, startVal, tableIdx):
|
||||
"""returns string to fill table with different integers for given table"""
|
||||
startVal = startVal + tableIdx * 20
|
||||
endVal = startVal + 20
|
||||
dataGenerationSql = ""
|
||||
dataGenerationSql += "INSERT INTO " + tableName
|
||||
dataGenerationSql += (
|
||||
" SELECT i FROM generate_series(" + str(startVal) + "," + str(endVal) + ") i;"
|
||||
)
|
||||
return dataGenerationSql
|
|
@ -0,0 +1,48 @@
|
|||
from config.config import getConfig, isTableDistributed, isTableReference
|
||||
|
||||
|
||||
def getTableDDLs():
|
||||
ddls = ""
|
||||
tables = getConfig().targetTables
|
||||
for table in tables:
|
||||
ddls += _genTableDDL(table)
|
||||
ddls += "\n"
|
||||
return ddls
|
||||
|
||||
|
||||
def _genTableDDL(table):
|
||||
ddl = ""
|
||||
ddl += "DROP TABLE IF EXISTS " + table.name + ";"
|
||||
ddl += "\n"
|
||||
|
||||
ddl += "CREATE TABLE " + table.name + "("
|
||||
for column in table.columns[:-1]:
|
||||
ddl += _genColumnDDL(column)
|
||||
ddl += ",\n"
|
||||
if len(table.columns) > 0:
|
||||
ddl += _genColumnDDL(table.columns[-1])
|
||||
ddl += ");\n"
|
||||
|
||||
if isTableDistributed(table):
|
||||
ddl += (
|
||||
"SELECT create_distributed_table("
|
||||
+ "'"
|
||||
+ table.name
|
||||
+ "','"
|
||||
+ getConfig().commonColName
|
||||
+ "'"
|
||||
+ ");"
|
||||
)
|
||||
ddl += "\n"
|
||||
elif isTableReference(table):
|
||||
ddl += "SELECT create_reference_table(" + "'" + table.name + "'" + ");"
|
||||
ddl += "\n"
|
||||
return ddl
|
||||
|
||||
|
||||
def _genColumnDDL(column):
|
||||
ddl = ""
|
||||
ddl += column.name
|
||||
ddl += " "
|
||||
ddl += column.type
|
||||
return ddl
|
|
@ -0,0 +1,65 @@
|
|||
import signal
|
||||
import sys
|
||||
|
||||
from data_gen import getTableData
|
||||
from ddl_gen import getTableDDLs
|
||||
from query_gen import newQuery
|
||||
|
||||
from config.config import getConfig, resetConfig
|
||||
|
||||
|
||||
def _signal_handler(sig, frame):
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
def _interactiveMode(ddls, data):
|
||||
print(ddls)
|
||||
print(data)
|
||||
|
||||
while True:
|
||||
res = input("Press x to exit or Enter to generate more")
|
||||
if res.lower() == "x":
|
||||
print("Exit from query generation mode!")
|
||||
sys.exit(0)
|
||||
|
||||
query = newQuery()
|
||||
print(query)
|
||||
|
||||
resetConfig()
|
||||
|
||||
|
||||
def _fileMode(ddls, data):
|
||||
ddlFileName = "out/" + getConfig().ddlOutFile
|
||||
with open(ddlFileName, "w") as ddlFile:
|
||||
ddlFile.writelines([ddls, data])
|
||||
|
||||
queryCount = getConfig().queryCount
|
||||
fileName = "out/" + getConfig().queryOutFile
|
||||
with open(fileName, "w") as f:
|
||||
# enable repartition joins due to https://github.com/citusdata/citus/issues/6865
|
||||
enableRepartitionJoinCommand = "SET citus.enable_repartition_joins TO on;\n"
|
||||
queryLines = [enableRepartitionJoinCommand]
|
||||
queryId = 1
|
||||
for _ in range(queryCount):
|
||||
query = newQuery()
|
||||
|
||||
queryLine = "-- queryId: " + str(queryId) + "\n"
|
||||
queryLine += query + "\n\n"
|
||||
|
||||
queryLines.append(queryLine)
|
||||
queryId += 1
|
||||
f.writelines(queryLines)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
signal.signal(signal.SIGINT, _signal_handler)
|
||||
|
||||
resetConfig()
|
||||
|
||||
ddls = getTableDDLs()
|
||||
data = getTableData()
|
||||
|
||||
if getConfig().interactiveMode:
|
||||
_interactiveMode(ddls, data)
|
||||
else:
|
||||
_fileMode(ddls, data)
|
|
@ -0,0 +1,55 @@
|
|||
from enum import Enum
|
||||
|
||||
|
||||
class JoinType(Enum):
|
||||
INNER = 1
|
||||
LEFT = 2
|
||||
RIGHT = 3
|
||||
FULL = 4
|
||||
|
||||
|
||||
class RTEType(Enum):
|
||||
RELATION = 1
|
||||
SUBQUERY = 2
|
||||
CTE = 3
|
||||
VALUES = 4
|
||||
|
||||
|
||||
class RestrictOp(Enum):
|
||||
LT = 1
|
||||
GT = 2
|
||||
EQ = 3
|
||||
|
||||
|
||||
class CitusType(Enum):
|
||||
DISTRIBUTED = 1
|
||||
REFERENCE = 2
|
||||
POSTGRES = 3
|
||||
|
||||
|
||||
class Table:
|
||||
def __init__(
|
||||
self,
|
||||
name,
|
||||
citusType,
|
||||
maxAllowedUseOnQuery,
|
||||
rowCount,
|
||||
nullRate,
|
||||
duplicateRate,
|
||||
columns,
|
||||
distinctCopyCount,
|
||||
):
|
||||
self.name = name
|
||||
self.citusType = citusType
|
||||
self.maxAllowedUseOnQuery = maxAllowedUseOnQuery
|
||||
self.rowCount = rowCount
|
||||
self.nullRate = nullRate
|
||||
self.duplicateRate = duplicateRate
|
||||
self.columns = columns
|
||||
self.distinctCopyCount = distinctCopyCount
|
||||
|
||||
|
||||
class Column:
|
||||
def __init__(self, name, type):
|
||||
self.name = name
|
||||
self.type = type
|
|
@ -0,0 +1,2 @@
|
|||
*
|
||||
!.gitignore
|
|
@ -0,0 +1,434 @@
|
|||
import random
|
||||
|
||||
from node_defs import RTEType
|
||||
from random_selections import (
|
||||
randomJoinOp,
|
||||
randomRestrictOp,
|
||||
randomRteType,
|
||||
shouldSelectThatBranch,
|
||||
)
|
||||
|
||||
from config.config import getAllTableNames, getConfig, getMaxAllowedCountForTable
|
||||
|
||||
# query_gen.py generates a new query from grammar rules below. It randomly chooses allowed rules
|
||||
# to generate a query. Here are some important notes about the query generation:
|
||||
#
|
||||
# - We enforce the max allowed # of usage for each table. It also enforces global total # of tables.
|
||||
# - Table names, restriction types and any other selections are chosen between the values provided by
|
||||
# configuration file (config.yml).
|
||||
# - Entry point for the generator is newQuery and all other methods are internal methods. We pass a context
|
||||
# object named GeneratorContext to all internal methods as parameter to perform checks and generations
|
||||
# on the query via the context.
|
||||
# - shouldSelectThatBranch() is useful utility method to randomly chose a grammar rule. Some of the rules
|
||||
# are only selected if we allowed them in configuration file.
|
||||
# - We enforce table limits separately if we are inside cte part of the query(see targetCteRteCount).
|
||||
# We also enforce max # of ctes for a query.
|
||||
# - commonColName from the config is used at select and where clauses.
|
||||
# - useAvgAtTopLevelTarget is useful to return single row as query result. It is also useful to track Citus
|
||||
# query bugs via run_query_compare_test.py.
|
||||
# - '=' restriction is removed from the config by default to return values different than null most of the time.
|
||||
# - 'RTE.VALUES' is also removed from the config for the same reason as above.
|
||||
# - Filter range is selected as same with data range for the same reason as above.
|
||||
# - aliasStack at GeneratorContext is useful to put correct table names into where clause.
|
||||
|
||||
# grammar syntax
|
||||
#
|
||||
# ======Assumptions======
|
||||
# 1. Tables has common dist col
|
||||
# 2. All operations execute on common column for all tables.
|
||||
#
|
||||
# TODO: RTE_FUNCTION, RTE_TABLEFUNC
|
||||
#
|
||||
# ====SYNTAX====
|
||||
# ===Nonterminals===
|
||||
# Query
|
||||
# SelectExpr
|
||||
# FromExpr
|
||||
# RestrictExpr
|
||||
# RteList
|
||||
# Rte
|
||||
# SubqueryRte
|
||||
# RelationRte
|
||||
# JoinList
|
||||
# JoinOp
|
||||
# Using
|
||||
# RestrictList
|
||||
# Restrict
|
||||
# CteRte
|
||||
# CteList
|
||||
# Cte
|
||||
# ValuesRte
|
||||
# Limit
|
||||
# OrderBy
|
||||
#
|
||||
# ===Terminals===
|
||||
# e 'SELECT' 'FROM' 'INNER JOIN' 'LEFT JOIN' 'RIGHT JOIN' 'FULL JOIN' 'WHERE' 'LIMIT' 'USING' 'WITH'
|
||||
# 'ORDER BY' 'VALUES' 'IN' 'NOT' 'AS' '<' '>' '=' '*' ',' ';' '(' ')'
|
||||
#
|
||||
# ===Rules===
|
||||
# Start -> Query ';' || 'WITH' CteList Query ';'
|
||||
# Query -> SelectExpr FromExpr [OrderBy] [Limit] || 'SELECT' 'avg(avgsub.DistColName)' 'FROM' SubqueryRte 'AS avgsub'
|
||||
# SelectExpr -> 'SELECT' 'curAlias()' '.' DistColName
|
||||
# FromExpr -> 'FROM' (Rte JoinList JoinOp Rte Using || RteList) ['WHERE' 'nextRandomAlias()' '.' DistColName RestrictExpr]
|
||||
# RestrictExpr -> ('<' || '>' || '=') Int || ['NOT'] 'IN' SubqueryRte
|
||||
# JoinList -> JoinOp Rte Using JoinList || e
|
||||
# Using -> 'USING' '(' DistColName ')'
|
||||
# RteList -> Rte [, RteList] || Rte
|
||||
# Rte -> SubqueryRte 'AS' 'nextRandomAlias()' || RelationRte 'AS' 'nextRandomAlias()' ||
|
||||
# CteRte 'AS' 'nextRandomAlias()' || ValuesRte 'AS' 'nextRandomAlias()'
|
||||
# SubqueryRte -> '(' Query ')'
|
||||
# RelationRte -> 'nextRandomTableName()'
|
||||
# CteRte -> 'randomCteName()'
|
||||
# CteList -> Cte [',' CteList] || Cte
|
||||
# Cte -> 'nextRandomAlias()' 'AS' '(' Query ')'
|
||||
# ValuesRte -> '(' 'VALUES' '(' 'random()' ')' ')'
|
||||
# JoinOp -> 'INNER JOIN' || 'LEFT JOIN' || 'RIGHT JOIN' || 'FULL JOIN'
|
||||
# Limit -> 'LIMIT' 'random()'
|
||||
# OrderBy -> 'ORDER BY' DistColName
|
||||
# DistColName -> 'hardwired(get from config)'
|
||||
|
||||
|
||||
class GeneratorContext:
|
||||
"""context to store some variables which should be available during generation"""
|
||||
|
||||
def __init__(self):
|
||||
# each level's last table is used in WHERE clause for the level
|
||||
self.aliasStack = []
|
||||
# tracks if we are inside cte as we should not refer cte inside cte
|
||||
self.insideCte = False
|
||||
# total rtes in cte + non-cte parts
|
||||
self.totalRteCount = 0
|
||||
# rte count in non-cte part to enforce non-cte rte limit
|
||||
self.currentRteCount = 0
|
||||
# cte count to enforce cte limit
|
||||
self.currentCteCount = 0
|
||||
# rte count in cte part to enforce rte limit in cte
|
||||
self.currentCteRteCount = 0
|
||||
# rte counts per table to enforce rte limit per table
|
||||
self.perTableRtes = {}
|
||||
# tables which hit count limit
|
||||
self.disallowedTables = set()
|
||||
# useful to track usage avg only at first select
|
||||
self.usedAvg = False
|
||||
|
||||
def randomCteName(self):
|
||||
"""returns a randomly selected cte name"""
|
||||
randCteRef = random.randint(0, self.currentCteCount - 1)
|
||||
return " cte_" + str(randCteRef)
|
||||
|
||||
def curAlias(self):
|
||||
"""returns current alias name to be used for the current table"""
|
||||
return " table_" + str(self.totalRteCount)
|
||||
|
||||
def curCteAlias(self):
|
||||
"""returns current alias name to be used for the current cte"""
|
||||
return " cte_" + str(self.currentCteCount)
|
||||
|
||||
def hasAnyCte(self):
|
||||
"""returns if context has any cte"""
|
||||
return self.currentCteCount > 0
|
||||
|
||||
def canGenerateNewRte(self):
|
||||
"""checks if context exceeds allowed rte count"""
|
||||
return self.currentRteCount < getConfig().targetRteCount
|
||||
|
||||
def canGenerateNewCte(self):
|
||||
"""checks if context exceeds allowed cte count"""
|
||||
return self.currentCteCount < getConfig().targetCteCount
|
||||
|
||||
def canGenerateNewRteInsideCte(self):
|
||||
"""checks if context exceeds allowed rte count inside a cte"""
|
||||
return self.currentCteRteCount < getConfig().targetCteRteCount
|
||||
|
||||
def addAlias(self, alias):
|
||||
"""adds given alias to the stack"""
|
||||
self.aliasStack.append(alias)
|
||||
|
||||
def removeLastAlias(self):
|
||||
"""removes the latest added alias from the stack"""
|
||||
return self.aliasStack.pop()
|
||||
|
||||
def getRteNameEnforcingRteLimits(self):
|
||||
"""returns rteName by enforcing rte count limits for tables"""
|
||||
# do not enforce per table rte limit if we are inside cte
|
||||
if self.insideCte:
|
||||
rteName = random.choice(getAllTableNames())
|
||||
return " " + rteName + " "
|
||||
|
||||
while True:
|
||||
# keep trying to find random table by eliminating the ones which hit table limit
|
||||
allowedNames = set(getAllTableNames()) - self.disallowedTables
|
||||
assert len(allowedNames) > 0
|
||||
rteName = random.choice(list(allowedNames))
|
||||
|
||||
# not yet added to rte count map, so we can allow the name
|
||||
if rteName not in self.perTableRtes:
|
||||
self.perTableRtes[rteName] = 0
|
||||
break
|
||||
# limit is not exceeded, so we can allow the name
|
||||
if self.perTableRtes[rteName] < getMaxAllowedCountForTable(rteName):
|
||||
break
|
||||
else:
|
||||
self.disallowedTables.add(rteName)
|
||||
|
||||
# increment rte count for the table name
|
||||
self.perTableRtes[rteName] += 1
|
||||
return " " + rteName + " "
|
||||
|
||||
|
||||
def newQuery():
|
||||
"""returns generated query"""
|
||||
genCtx = GeneratorContext()
|
||||
return _start(genCtx)
|
||||
|
||||
|
||||
def _start(genCtx):
|
||||
# Query ';' || 'WITH' CteList Query ';'
|
||||
query = ""
|
||||
if not genCtx.canGenerateNewCte() or shouldSelectThatBranch():
|
||||
query += _genQuery(genCtx)
|
||||
else:
|
||||
genCtx.insideCte = True
|
||||
query += " WITH "
|
||||
query += _genCteList(genCtx)
|
||||
genCtx.insideCte = False
|
||||
query += _genQuery(genCtx)
|
||||
query += ";"
|
||||
return query
|
||||
|
||||
|
||||
def _genQuery(genCtx):
|
||||
# SelectExpr FromExpr [OrderBy] [Limit] || 'SELECT' 'avg(avgsub.DistColName)' 'FROM' SubqueryRte 'AS avgsub'
|
||||
query = ""
|
||||
if (
|
||||
getConfig().useAvgAtTopLevelTarget
|
||||
and not genCtx.insideCte
|
||||
and not genCtx.usedAvg
|
||||
):
|
||||
genCtx.usedAvg = True
|
||||
query += "SELECT "
|
||||
query += "count(*), avg(avgsub." + getConfig().commonColName + ") FROM "
|
||||
query += _genSubqueryRte(genCtx)
|
||||
query += " AS avgsub"
|
||||
else:
|
||||
query += _genSelectExpr(genCtx)
|
||||
query += _genFromExpr(genCtx)
|
||||
choseOrderby = False
|
||||
if getConfig().orderby and shouldSelectThatBranch():
|
||||
query += _genOrderBy(genCtx)
|
||||
choseOrderby = True
|
||||
if getConfig().limit and shouldSelectThatBranch():
|
||||
if not choseOrderby and getConfig().forceOrderbyWithLimit:
|
||||
query += _genOrderBy(genCtx)
|
||||
query += _genLimit(genCtx)
|
||||
return query
|
||||
|
||||
|
||||
def _genOrderBy(genCtx):
|
||||
# 'ORDER BY' DistColName
|
||||
query = ""
|
||||
query += " ORDER BY "
|
||||
query += getConfig().commonColName + " "
|
||||
return query
|
||||
|
||||
|
||||
def _genLimit(genCtx):
|
||||
# 'LIMIT' 'random()'
|
||||
query = ""
|
||||
query += " LIMIT "
|
||||
(fromVal, toVal) = getConfig().limitRange
|
||||
query += str(random.randint(fromVal, toVal))
|
||||
return query
|
||||
|
||||
|
||||
def _genSelectExpr(genCtx):
|
||||
# 'SELECT' 'curAlias()' '.' DistColName
|
||||
query = ""
|
||||
query += " SELECT "
|
||||
commonColName = getConfig().commonColName
|
||||
query += genCtx.curAlias() + "." + commonColName + " "
|
||||
|
||||
return query
|
||||
|
||||
|
||||
def _genFromExpr(genCtx):
|
||||
# 'FROM' (Rte JoinList JoinOp Rte Using || RteList) ['WHERE' 'nextRandomAlias()' '.' DistColName RestrictExpr]
|
||||
query = ""
|
||||
query += " FROM "
|
||||
|
||||
if shouldSelectThatBranch():
|
||||
query += _genRte(genCtx)
|
||||
query += _genJoinList(genCtx)
|
||||
query += randomJoinOp()
|
||||
query += _genRte(genCtx)
|
||||
query += _genUsing(genCtx)
|
||||
else:
|
||||
query += _genRteList(genCtx)
|
||||
|
||||
alias = genCtx.removeLastAlias()
|
||||
if shouldSelectThatBranch():
|
||||
query += " WHERE "
|
||||
query += alias + "." + getConfig().commonColName
|
||||
query += _genRestrictExpr(genCtx)
|
||||
return query
|
||||
|
||||
|
||||
def _genRestrictExpr(genCtx):
|
||||
# ('<' || '>' || '=') Int || ['NOT'] 'IN' '(' SubqueryRte ')'
|
||||
query = ""
|
||||
if (
|
||||
not getConfig().semiAntiJoin
|
||||
or not genCtx.canGenerateNewRte()
|
||||
or shouldSelectThatBranch()
|
||||
):
|
||||
query += randomRestrictOp()
|
||||
(fromVal, toVal) = getConfig().filterRange
|
||||
query += str(random.randint(fromVal, toVal))
|
||||
else:
|
||||
if shouldSelectThatBranch():
|
||||
query += " NOT"
|
||||
query += " IN "
|
||||
query += _genSubqueryRte(genCtx)
|
||||
return query
|
||||
|
||||
|
||||
def _genCteList(genCtx):
|
||||
# Cte [',' CteList] || Cte
|
||||
query = ""
|
||||
|
||||
if shouldSelectThatBranch():
|
||||
query += _genCte(genCtx)
|
||||
if not genCtx.canGenerateNewCte():
|
||||
return query
|
||||
query += ","
|
||||
query += _genCteList(genCtx)
|
||||
else:
|
||||
query += _genCte(genCtx)
|
||||
return query
|
||||
|
||||
|
||||
def _genCte(genCtx):
|
||||
# 'nextRandomAlias()' 'AS' '(' Query ')'
|
||||
query = ""
|
||||
query += genCtx.curCteAlias()
|
||||
genCtx.currentCteCount += 1
|
||||
query += " AS "
|
||||
query += " ( "
|
||||
query += _genQuery(genCtx)
|
||||
query += " ) "
|
||||
return query
|
||||
|
||||
|
||||
def _genRteList(genCtx):
|
||||
# RteList -> Rte [, RteList] || Rte
|
||||
query = ""
|
||||
if getConfig().cartesianProduct and shouldSelectThatBranch():
|
||||
query += _genRte(genCtx)
|
||||
if not genCtx.canGenerateNewRte():
|
||||
return query
|
||||
if genCtx.insideCte and not genCtx.canGenerateNewRteInsideCte():
|
||||
return query
|
||||
query += ","
|
||||
query += _genRteList(genCtx)
|
||||
else:
|
||||
query += _genRte(genCtx)
|
||||
return query
|
||||
|
||||
|
||||
def _genJoinList(genCtx):
|
||||
# JoinOp Rte Using JoinList || e
|
||||
query = ""
|
||||
|
||||
if shouldSelectThatBranch():
|
||||
if not genCtx.canGenerateNewRte():
|
||||
return query
|
||||
if genCtx.insideCte and not genCtx.canGenerateNewRteInsideCte():
|
||||
return query
|
||||
query += randomJoinOp()
|
||||
query += _genRte(genCtx)
|
||||
query += _genUsing(genCtx)
|
||||
query += _genJoinList(genCtx)
|
||||
return query
|
||||
|
||||
|
||||
def _genUsing(genCtx):
|
||||
# 'USING' '(' DistColName ')'
|
||||
query = ""
|
||||
query += " USING (" + getConfig().commonColName + " ) "
|
||||
return query
|
||||
|
||||
|
||||
def _genRte(genCtx):
|
||||
# SubqueryRte as 'nextRandomAlias()' || RelationRte as 'curAlias()' ||
|
||||
# CteRte as 'curAlias()' || ValuesRte 'AS' 'nextRandomAlias()'
|
||||
alias = genCtx.curAlias()
|
||||
modifiedAlias = None
|
||||
|
||||
if genCtx.insideCte:
|
||||
genCtx.currentCteRteCount += 1
|
||||
else:
|
||||
genCtx.currentRteCount += 1
|
||||
genCtx.totalRteCount += 1
|
||||
|
||||
# donot dive into recursive subquery further if we hit into rte limit, replace it with relation rte
|
||||
rteType = randomRteType()
|
||||
if not genCtx.canGenerateNewRte():
|
||||
rteType = RTEType.RELATION
|
||||
|
||||
# donot dive into recursive subquery further if we hit into rte in cte limit, replace it with relation rte
|
||||
if genCtx.insideCte and not genCtx.canGenerateNewRteInsideCte():
|
||||
rteType = RTEType.RELATION
|
||||
|
||||
# we cannot refer to cte if we are inside it or we donot have any cte
|
||||
if (genCtx.insideCte or not genCtx.hasAnyCte()) and rteType == RTEType.CTE:
|
||||
rteType = RTEType.RELATION
|
||||
|
||||
query = ""
|
||||
if rteType == RTEType.SUBQUERY:
|
||||
query += _genSubqueryRte(genCtx)
|
||||
elif rteType == RTEType.RELATION:
|
||||
query += _genRelationRte(genCtx)
|
||||
elif rteType == RTEType.CTE:
|
||||
query += _genCteRte(genCtx)
|
||||
elif rteType == RTEType.VALUES:
|
||||
query += _genValuesRte(genCtx)
|
||||
modifiedAlias = alias + "(" + getConfig().commonColName + ") "
|
||||
else:
|
||||
raise BaseException("unknown RTE type")
|
||||
|
||||
query += " AS "
|
||||
query += alias if not modifiedAlias else modifiedAlias
|
||||
genCtx.addAlias(alias)
|
||||
|
||||
return query
|
||||
|
||||
|
||||
def _genSubqueryRte(genCtx):
|
||||
# '(' Query ')'
|
||||
query = ""
|
||||
query += " ( "
|
||||
query += _genQuery(genCtx)
|
||||
query += " ) "
|
||||
return query
|
||||
|
||||
|
||||
def _genRelationRte(genCtx):
|
||||
# 'randomAllowedTableName()'
|
||||
query = ""
|
||||
query += genCtx.getRteNameEnforcingRteLimits()
|
||||
return query
|
||||
|
||||
|
||||
def _genCteRte(genCtx):
|
||||
# 'randomCteName()'
|
||||
query = ""
|
||||
query += genCtx.randomCteName()
|
||||
return query
|
||||
|
||||
|
||||
def _genValuesRte(genCtx):
|
||||
# '( VALUES(random()) )'
|
||||
query = ""
|
||||
(fromVal, toVal) = getConfig().dataRange
|
||||
query += " ( VALUES(" + str(random.randint(fromVal, toVal)) + " ) ) "
|
||||
return query
|
|
@ -0,0 +1,39 @@
|
|||
import random
|
||||
|
||||
from node_defs import RestrictOp
|
||||
|
||||
from config.config import getConfig
|
||||
|
||||
|
||||
def shouldSelectThatBranch():
|
||||
"""returns 0 or 1 randomly"""
|
||||
return random.randint(0, 1)
|
||||
|
||||
|
||||
def randomRteType():
|
||||
"""returns a randomly selected RteType given at config"""
|
||||
rtes = getConfig().targetRteTypes
|
||||
return random.choice(rtes)
|
||||
|
||||
|
||||
def randomJoinOp():
|
||||
"""returns a randomly selected JoinOp given at config"""
|
||||
joinTypes = getConfig().targetJoinTypes
|
||||
return " " + random.choice(joinTypes).name + " JOIN"
|
||||
|
||||
|
||||
def randomRestrictOp():
|
||||
"""returns a randomly selected RestrictOp given at config"""
|
||||
restrictOps = getConfig().targetRestrictOps
|
||||
restrictOp = random.choice(restrictOps)
|
||||
opText = ""
|
||||
if restrictOp == RestrictOp.LT:
|
||||
opText = "<"
|
||||
elif restrictOp == RestrictOp.GT:
|
||||
opText = ">"
|
||||
elif restrictOp == RestrictOp.EQ:
|
||||
opText = "="
|
||||
else:
|
||||
raise BaseException("Unknown restrict op")
|
||||
|
||||
return " " + opText + " "
|
Loading…
Reference in New Issue