diff --git a/.circleci/config.yml b/.circleci/config.yml index 225195d27..1f97c5635 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -6,7 +6,7 @@ orbs: parameters: image_suffix: type: string - default: '-v3417e8d' + default: '-vabeb997' pg13_version: type: string default: '13.10' @@ -421,6 +421,63 @@ jobs: - coverage: flags: 'test_<< parameters.pg_major >>,upgrade' + test-query-generator: + description: Expects that the generated queries that are run on distributed and local tables would have the same results + parameters: + pg_major: + description: 'postgres major version' + type: integer + image: + description: 'docker image to use as for the tests' + type: string + default: citus/failtester + image_tag: + description: 'docker image tag to use' + type: string + docker: + - image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - install_extension: + pg_major: << parameters.pg_major >> + - configure + - enable_core + - run: + name: 'Run Test' + command: | + gosu circleci make -C src/test/regress check-query-generator + no_output_timeout: 5m + - run: + name: 'Show regressions' + command: | + find src/test/regress/citus_tests/query_generator/out/ -name "local_dist.diffs" -exec cat {} + + lines=$(find src/test/regress/citus_tests/query_generator/out/ -name "local_dist.diffs" | wc -l) + if [ $lines -ne 0 ]; then + exit 1 + fi + when: on_fail + - run: + name: 'Copy logfiles' + command: | + mkdir src/test/regress/tmp_citus_test/logfiles + find src/test/regress/tmp_citus_test/ -name "logfile_*" -exec cp -t src/test/regress/tmp_citus_test/logfiles/ {} + + when: on_fail + - store_artifacts: + name: 'Save logfiles' + path: src/test/regress/tmp_citus_test/logfiles + - store_artifacts: + name: 'Save ddls' + path: src/test/regress/citus_tests/query_generator/out/ddls.sql + - store_artifacts: + name: 'Save dmls' + path: src/test/regress/citus_tests/query_generator/out/queries.sql + - stack_trace + - coverage: + flags: 'test_<< parameters.pg_major >>,querygen' + test-citus: description: Runs the common tests of citus parameters: @@ -935,6 +992,24 @@ workflows: image_tag: '<< pipeline.parameters.pg15_version >>' requires: [build-15] + - test-query-generator: + name: 'test-13_check-query-generator' + pg_major: 13 + image_tag: '<< pipeline.parameters.pg13_version >>' + requires: [build-13] + + - test-query-generator: + name: 'test-14_check-query-generator' + pg_major: 14 + image_tag: '<< pipeline.parameters.pg14_version >>' + requires: [build-14] + + - test-query-generator: + name: 'test-15_check-query-generator' + pg_major: 15 + image_tag: '<< pipeline.parameters.pg15_version >>' + requires: [build-15] + - test-pg-upgrade: name: 'test-13-14_check-pg-upgrade' old_pg_major: 13 @@ -975,6 +1050,7 @@ workflows: - test-13_check-enterprise-failure - test-13_check-split - test-13_check-arbitrary-configs + - test-13_check-query-generator - test-14_check-multi - test-14_check-multi-1 - test-14_check-mx @@ -993,6 +1069,7 @@ workflows: - test-14_check-enterprise-failure - test-14_check-split - test-14_check-arbitrary-configs + - test-14_check-query-generator - test-15_check-multi - test-15_check-multi-1 - test-15_check-mx @@ -1011,6 +1088,7 @@ workflows: - test-15_check-enterprise-failure - test-15_check-split - test-15_check-arbitrary-configs + - test-15_check-query-generator - test-13-14_check-pg-upgrade - test-14-15_check-pg-upgrade - test-13_check-citus-upgrade diff --git a/src/test/regress/Makefile b/src/test/regress/Makefile index b801f33ff..04c38033d 100644 --- a/src/test/regress/Makefile +++ b/src/test/regress/Makefile @@ -34,6 +34,7 @@ MULTI_REGRESS_OPTS = --inputdir=$(citus_abs_srcdir) $(pg_regress_locale_flags) - pg_upgrade_check = $(citus_abs_srcdir)/citus_tests/upgrade/pg_upgrade_test.py citus_upgrade_check =CITUS_OLD_VERSION=$(citus-old-version) $(citus_abs_srcdir)/citus_tests/upgrade/citus_upgrade_test.py arbitrary_config_check = $(citus_abs_srcdir)/citus_tests/arbitrary_configs/citus_arbitrary_configs.py +query_generator_check = $(citus_abs_srcdir)/citus_tests/query_generator/bin/run_query_compare_test.py template_isolation_files = $(shell find $(citus_abs_srcdir)/spec/ -name '*.spec') generated_isolation_files = $(patsubst $(citus_abs_srcdir)/spec/%,$(citus_abs_srcdir)/build/specs/%,$(template_isolation_files)) @@ -44,7 +45,7 @@ vanilla_diffs_file = $(citus_abs_srcdir)/pg_vanilla_outputs/$(MAJORVERSION)/regr # intermediate, for muscle memory backward compatibility. check: check-full check-enterprise-full # check-full triggers all tests that ought to be run routinely -check-full: check-multi check-multi-mx check-multi-1 check-operations check-follower-cluster check-isolation check-failure check-split check-vanilla check-columnar check-columnar-isolation check-pg-upgrade check-arbitrary-configs check-citus-upgrade check-citus-upgrade-mixed check-citus-upgrade-local check-citus-upgrade-mixed-local check-pytest +check-full: check-multi check-multi-mx check-multi-1 check-operations check-follower-cluster check-isolation check-failure check-split check-vanilla check-columnar check-columnar-isolation check-pg-upgrade check-arbitrary-configs check-citus-upgrade check-citus-upgrade-mixed check-citus-upgrade-local check-citus-upgrade-mixed-local check-pytest check-query-generator # check-enterprise-full triggers all enterprise specific tests check-enterprise-full: check-enterprise check-enterprise-isolation check-enterprise-failure check-enterprise-isolation-logicalrep-1 check-enterprise-isolation-logicalrep-2 check-enterprise-isolation-logicalrep-3 @@ -262,6 +263,9 @@ check-arbitrary-base: all check-pytest: pytest -n auto +check-query-generator: all + ${query_generator_check} --bindir=$(bindir) --pgxsdir=$(pgxsdir) + check-citus-upgrade: all $(citus_upgrade_check) \ --bindir=$(bindir) \ diff --git a/src/test/regress/Pipfile b/src/test/regress/Pipfile index 663785a3d..5bce63004 100644 --- a/src/test/regress/Pipfile +++ b/src/test/regress/Pipfile @@ -15,6 +15,7 @@ pytest-asyncio = "*" pytest-timeout = "*" pytest-xdist = "*" pytest-repeat = "*" +pyyaml = "*" [dev-packages] black = "*" diff --git a/src/test/regress/Pipfile.lock b/src/test/regress/Pipfile.lock index 4a86e09a8..ed604c2c0 100644 --- a/src/test/regress/Pipfile.lock +++ b/src/test/regress/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "eb9ca3a7b05e76c7ac60179a1755f89600dfb215e02bf08c258d548df1d96025" + "sha256": "9568b1f3e4d4fd408e5e263f6346b0a4f479ac88e02f64bb79a9d482096e6a03" }, "pipfile-spec": 6, "requires": { @@ -24,14 +24,6 @@ "markers": "python_version >= '3.6'", "version": "==3.4.1" }, - "attrs": { - "hashes": [ - "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836", - "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99" - ], - "markers": "python_version >= '3.6'", - "version": "==22.2.0" - }, "blinker": { "hashes": [ "sha256:471aee25f3992bd325afa3772f1063dbdbbca947a041b8b89466dc00d606f8b6" @@ -219,32 +211,28 @@ }, "cryptography": { "hashes": [ - "sha256:103e8f7155f3ce2ffa0049fe60169878d47a4364b277906386f8de21c9234aa1", - "sha256:23df8ca3f24699167daf3e23e51f7ba7334d504af63a94af468f468b975b7dd7", - "sha256:2725672bb53bb92dc7b4150d233cd4b8c59615cd8288d495eaa86db00d4e5c06", - "sha256:30b1d1bfd00f6fc80d11300a29f1d8ab2b8d9febb6ed4a38a76880ec564fae84", - "sha256:35d658536b0a4117c885728d1a7032bdc9a5974722ae298d6c533755a6ee3915", - "sha256:50cadb9b2f961757e712a9737ef33d89b8190c3ea34d0fb6675e00edbe35d074", - "sha256:5f8c682e736513db7d04349b4f6693690170f95aac449c56f97415c6980edef5", - "sha256:6236a9610c912b129610eb1a274bdc1350b5df834d124fa84729ebeaf7da42c3", - "sha256:788b3921d763ee35dfdb04248d0e3de11e3ca8eb22e2e48fef880c42e1f3c8f9", - "sha256:8bc0008ef798231fac03fe7d26e82d601d15bd16f3afaad1c6113771566570f3", - "sha256:8f35c17bd4faed2bc7797d2a66cbb4f986242ce2e30340ab832e5d99ae60e011", - "sha256:b49a88ff802e1993b7f749b1eeb31134f03c8d5c956e3c125c75558955cda536", - "sha256:bc0521cce2c1d541634b19f3ac661d7a64f9555135e9d8af3980965be717fd4a", - "sha256:bc5b871e977c8ee5a1bbc42fa8d19bcc08baf0c51cbf1586b0e87a2694dde42f", - "sha256:c43ac224aabcbf83a947eeb8b17eaf1547bce3767ee2d70093b461f31729a480", - "sha256:d15809e0dbdad486f4ad0979753518f47980020b7a34e9fc56e8be4f60702fac", - "sha256:d7d84a512a59f4412ca8549b01f94be4161c94efc598bf09d027d67826beddc0", - "sha256:e029b844c21116564b8b61216befabca4b500e6816fa9f0ba49527653cae2108", - "sha256:e8a0772016feeb106efd28d4a328e77dc2edae84dfbac06061319fdb669ff828", - "sha256:e944fe07b6f229f4c1a06a7ef906a19652bdd9fd54c761b0ff87e83ae7a30354", - "sha256:eb40fe69cfc6f5cdab9a5ebd022131ba21453cf7b8a7fd3631f45bbf52bed612", - "sha256:fa507318e427169ade4e9eccef39e9011cdc19534f55ca2f36ec3f388c1f70f3", - "sha256:ffd394c7896ed7821a6d13b24657c6a34b6e2650bd84ae063cf11ccffa4f1a97" + "sha256:05dc219433b14046c476f6f09d7636b92a1c3e5808b9a6536adf4932b3b2c440", + "sha256:0dcca15d3a19a66e63662dc8d30f8036b07be851a8680eda92d079868f106288", + "sha256:142bae539ef28a1c76794cca7f49729e7c54423f615cfd9b0b1fa90ebe53244b", + "sha256:3daf9b114213f8ba460b829a02896789751626a2a4e7a43a28ee77c04b5e4958", + "sha256:48f388d0d153350f378c7f7b41497a54ff1513c816bcbbcafe5b829e59b9ce5b", + "sha256:4df2af28d7bedc84fe45bd49bc35d710aede676e2a4cb7fc6d103a2adc8afe4d", + "sha256:4f01c9863da784558165f5d4d916093737a75203a5c5286fde60e503e4276c7a", + "sha256:7a38250f433cd41df7fcb763caa3ee9362777fdb4dc642b9a349721d2bf47404", + "sha256:8f79b5ff5ad9d3218afb1e7e20ea74da5f76943ee5edb7f76e56ec5161ec782b", + "sha256:956ba8701b4ffe91ba59665ed170a2ebbdc6fc0e40de5f6059195d9f2b33ca0e", + "sha256:a04386fb7bc85fab9cd51b6308633a3c271e3d0d3eae917eebab2fac6219b6d2", + "sha256:a95f4802d49faa6a674242e25bfeea6fc2acd915b5e5e29ac90a32b1139cae1c", + "sha256:adc0d980fd2760c9e5de537c28935cc32b9353baaf28e0814df417619c6c8c3b", + "sha256:aecbb1592b0188e030cb01f82d12556cf72e218280f621deed7d806afd2113f9", + "sha256:b12794f01d4cacfbd3177b9042198f3af1c856eedd0a98f10f141385c809a14b", + "sha256:c0764e72b36a3dc065c155e5b22f93df465da9c39af65516fe04ed3c68c92636", + "sha256:c33c0d32b8594fa647d2e01dbccc303478e16fdd7cf98652d5b3ed11aa5e5c99", + "sha256:cbaba590180cba88cb99a5f76f90808a624f18b169b90a4abb40c1fd8c19420e", + "sha256:d5a1bd0e9e2031465761dfa920c16b0065ad77321d8a8c1f5ee331021fda65e9" ], "index": "pypi", - "version": "==39.0.2" + "version": "==40.0.2" }, "docopt": { "hashes": [ @@ -271,11 +259,11 @@ }, "filelock": { "hashes": [ - "sha256:3199fd0d3faea8b911be52b663dfccceb84c95949dd13179aa21436d1a79c4ce", - "sha256:e90b34656470756edf8b19656785c5fea73afa1953f3e1b0d645cef11cab3182" + "sha256:ad98852315c2ab702aeb628412cbf7e95b7ce8c3bf9565670b4eaecf1db370a9", + "sha256:fc03ae43288c013d2ea83c8597001b1129db351aad9c57fe2409327916b8e718" ], "index": "pypi", - "version": "==3.10.0" + "version": "==3.12.0" }, "flask": { "hashes": [ @@ -488,11 +476,11 @@ }, "packaging": { "hashes": [ - "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2", - "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97" + "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61", + "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f" ], "markers": "python_version >= '3.7'", - "version": "==23.0" + "version": "==23.1" }, "passlib": { "hashes": [ @@ -553,21 +541,11 @@ }, "pyasn1": { "hashes": [ - "sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359", - "sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576", - "sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf", - "sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7", - "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d", - "sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00", - "sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8", - "sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86", - "sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12", - "sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776", - "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba", - "sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2", - "sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3" + "sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57", + "sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde" ], - "version": "==0.4.8" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==0.5.0" }, "pycparser": { "hashes": [ @@ -578,11 +556,11 @@ }, "pyopenssl": { "hashes": [ - "sha256:c1cc5f86bcacefc84dada7d31175cae1b1518d5f60d3d0bb595a67822a868a6f", - "sha256:df5fc28af899e74e19fccb5510df423581047e10ab6f1f4ba1763ff5fde844c0" + "sha256:841498b9bec61623b1b6c47ebbc02367c07d60e0e195f19790817f10cc8db0b7", + "sha256:9e0c526404a210df9d2b18cd33364beadb0dc858a739b885677bc65e105d4a4c" ], "markers": "python_version >= '3.6'", - "version": "==23.0.0" + "version": "==23.1.1" }, "pyparsing": { "hashes": [ @@ -600,19 +578,19 @@ }, "pytest": { "hashes": [ - "sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e", - "sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4" + "sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362", + "sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3" ], "index": "pypi", - "version": "==7.2.2" + "version": "==7.3.1" }, "pytest-asyncio": { "hashes": [ - "sha256:83cbf01169ce3e8eb71c6c278ccb0574d1a7a3bb8eaaf5e50e0ad342afb33b36", - "sha256:f129998b209d04fcc65c96fc85c11e5316738358909a8399e93be553d7656442" + "sha256:2b38a496aef56f56b0e87557ec313e11e1ab9276fc3863f6a7be0f1d0e415e1b", + "sha256:f2b3366b7cd501a4056858bd39349d5af19742aed2d81660b7998b6341c7eb9c" ], "index": "pypi", - "version": "==0.20.3" + "version": "==0.21.0" }, "pytest-repeat": { "hashes": [ @@ -638,6 +616,52 @@ "index": "pypi", "version": "==3.2.1" }, + "pyyaml": { + "hashes": [ + "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf", + "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", + "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b", + "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57", + "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b", + "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4", + "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07", + "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba", + "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9", + "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287", + "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513", + "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0", + "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782", + "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0", + "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92", + "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f", + "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2", + "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc", + "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1", + "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c", + "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86", + "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4", + "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c", + "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34", + "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b", + "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d", + "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c", + "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb", + "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7", + "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737", + "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3", + "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d", + "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358", + "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53", + "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78", + "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803", + "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a", + "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f", + "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174", + "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5" + ], + "index": "pypi", + "version": "==6.0" + }, "ruamel.yaml": { "hashes": [ "sha256:1a771fc92d3823682b7f0893ad56cb5a5c87c48e62b5399d6f42c8759a583b33", @@ -705,20 +729,20 @@ }, "tornado": { "hashes": [ - "sha256:1d54d13ab8414ed44de07efecb97d4ef7c39f7438cf5e976ccd356bebb1b5fca", - "sha256:20f638fd8cc85f3cbae3c732326e96addff0a15e22d80f049e00121651e82e72", - "sha256:5c87076709343557ef8032934ce5f637dbb552efa7b21d08e89ae7619ed0eb23", - "sha256:5f8c52d219d4995388119af7ccaa0bcec289535747620116a58d830e7c25d8a8", - "sha256:6fdfabffd8dfcb6cf887428849d30cf19a3ea34c2c248461e1f7d718ad30b66b", - "sha256:87dcafae3e884462f90c90ecc200defe5e580a7fbbb4365eda7c7c1eb809ebc9", - "sha256:9b630419bde84ec666bfd7ea0a4cb2a8a651c2d5cccdbdd1972a0c859dfc3c13", - "sha256:b8150f721c101abdef99073bf66d3903e292d851bee51910839831caba341a75", - "sha256:ba09ef14ca9893954244fd872798b4ccb2367c165946ce2dd7376aebdde8e3ac", - "sha256:d3a2f5999215a3a06a4fc218026cd84c61b8b2b40ac5296a6db1f1451ef04c1e", - "sha256:e5f923aa6a47e133d1cf87d60700889d7eae68988704e20c75fb2d65677a8e4b" + "sha256:4546003dc8b5733489139d3bff5fa6a0211be505faf819bd9970e7c2b32e8122", + "sha256:4d349846931557b7ec92f224b5d598b160e2ba26ae1812480b42e9622c884bf7", + "sha256:6164571f5b9f73143d1334df4584cb9ac86d20c461e17b6c189a19ead8bb93c1", + "sha256:6cfff1e9c15c79e106b8352269d201f8fc0815914a6260f3893ca18b724ea94b", + "sha256:720f53e6367b38190ae7fa398c25c086c69d88b3c6535bd6021a126b727fb5cd", + "sha256:912df5712024564e362ecce43c8d5862e14c78c8dd3846c9d889d44fbd7f4951", + "sha256:c37b6a384d54ce6a31168d40ab21ad2591ddaf34973075cc0cad154402ecd9e8", + "sha256:c659ab04d5aa477dbe44152c67d93f3ad3243b992d94f795ca1d5c73c37337ce", + "sha256:c9114a61a4588c09065b9996ae05462350d17160b92b9bf9a1e93689cc0424dc", + "sha256:d68f3192936ff2c4add04dc21a436a43b4408d466746b78bb2b9d0a53a18683f", + "sha256:d7b737e18f701de3e4a3b0824260b4d740e4d60607b8089bb80e80ffd464780e" ], - "markers": "python_version >= '3.7'", - "version": "==6.2" + "markers": "python_version >= '3.8'", + "version": "==6.3" }, "typing-extensions": { "hashes": [ @@ -808,42 +832,42 @@ "develop": { "attrs": { "hashes": [ - "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836", - "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99" + "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04", + "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015" ], - "markers": "python_version >= '3.6'", - "version": "==22.2.0" + "markers": "python_version >= '3.7'", + "version": "==23.1.0" }, "black": { "hashes": [ - "sha256:0052dba51dec07ed029ed61b18183942043e00008ec65d5028814afaab9a22fd", - "sha256:0680d4380db3719ebcfb2613f34e86c8e6d15ffeabcf8ec59355c5e7b85bb555", - "sha256:121ca7f10b4a01fd99951234abdbd97728e1240be89fde18480ffac16503d481", - "sha256:162e37d49e93bd6eb6f1afc3e17a3d23a823042530c37c3c42eeeaf026f38468", - "sha256:2a951cc83ab535d248c89f300eccbd625e80ab880fbcfb5ac8afb5f01a258ac9", - "sha256:2bf649fda611c8550ca9d7592b69f0637218c2369b7744694c5e4902873b2f3a", - "sha256:382998821f58e5c8238d3166c492139573325287820963d2f7de4d518bd76958", - "sha256:49f7b39e30f326a34b5c9a4213213a6b221d7ae9d58ec70df1c4a307cf2a1580", - "sha256:57c18c5165c1dbe291d5306e53fb3988122890e57bd9b3dcb75f967f13411a26", - "sha256:7a0f701d314cfa0896b9001df70a530eb2472babb76086344e688829efd97d32", - "sha256:8178318cb74f98bc571eef19068f6ab5613b3e59d4f47771582f04e175570ed8", - "sha256:8b70eb40a78dfac24842458476135f9b99ab952dd3f2dab738c1881a9b38b753", - "sha256:9880d7d419bb7e709b37e28deb5e68a49227713b623c72b2b931028ea65f619b", - "sha256:9afd3f493666a0cd8f8df9a0200c6359ac53940cbde049dcb1a7eb6ee2dd7074", - "sha256:a29650759a6a0944e7cca036674655c2f0f63806ddecc45ed40b7b8aa314b651", - "sha256:a436e7881d33acaf2536c46a454bb964a50eff59b21b51c6ccf5a40601fbef24", - "sha256:a59db0a2094d2259c554676403fa2fac3473ccf1354c1c63eccf7ae65aac8ab6", - "sha256:a8471939da5e824b891b25751955be52ee7f8a30a916d570a5ba8e0f2eb2ecad", - "sha256:b0bd97bea8903f5a2ba7219257a44e3f1f9d00073d6cc1add68f0beec69692ac", - "sha256:b6a92a41ee34b883b359998f0c8e6eb8e99803aa8bf3123bf2b2e6fec505a221", - "sha256:bb460c8561c8c1bec7824ecbc3ce085eb50005883a6203dcfb0122e95797ee06", - "sha256:bfffba28dc52a58f04492181392ee380e95262af14ee01d4bc7bb1b1c6ca8d27", - "sha256:c1c476bc7b7d021321e7d93dc2cbd78ce103b84d5a4cf97ed535fbc0d6660648", - "sha256:c91dfc2c2a4e50df0026f88d2215e166616e0c80e86004d0003ece0488db2739", - "sha256:e6663f91b6feca5d06f2ccd49a10f254f9298cc1f7f49c46e498a0771b507104" + "sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5", + "sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915", + "sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326", + "sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940", + "sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b", + "sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30", + "sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c", + "sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c", + "sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab", + "sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27", + "sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2", + "sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961", + "sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9", + "sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb", + "sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70", + "sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331", + "sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2", + "sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266", + "sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d", + "sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6", + "sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b", + "sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925", + "sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8", + "sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4", + "sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3" ], "index": "pypi", - "version": "==23.1.0" + "version": "==23.3.0" }, "click": { "hashes": [ @@ -863,11 +887,11 @@ }, "flake8-bugbear": { "hashes": [ - "sha256:beb5c7efcd7ccc2039ef66a77bb8db925e7be3531ff1cb4d0b7030d0e2113d72", - "sha256:e3e7f74c8a49ad3794a7183353026dabd68c74030d5f46571f84c1fb0eb79363" + "sha256:8a218d13abd6904800970381057ce8e72cde8eea743240c4ef3ede4dd0bc9cfb", + "sha256:ea565bdb87b96b56dc499edd6cc3ba7f695373d902a5f56c989b74fad7c7719d" ], "index": "pypi", - "version": "==23.3.12" + "version": "==23.3.23" }, "isort": { "hashes": [ @@ -895,11 +919,11 @@ }, "packaging": { "hashes": [ - "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2", - "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97" + "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61", + "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f" ], "markers": "python_version >= '3.7'", - "version": "==23.0" + "version": "==23.1" }, "pathspec": { "hashes": [ @@ -911,11 +935,11 @@ }, "platformdirs": { "hashes": [ - "sha256:024996549ee88ec1a9aa99ff7f8fc819bb59e2c3477b410d90a16d32d6e707aa", - "sha256:e5986afb596e4bb5bde29a79ac9061aa955b94fca2399b7aaac4090860920dd8" + "sha256:d5b638ca397f25f979350ff789db335903d7ea010ab28903f57b27e1b16c2b08", + "sha256:ebe11c0d7a805086e99506aa331612429a72ca7cd52a1f0d277dc4adc20cb10e" ], "markers": "python_version >= '3.7'", - "version": "==3.1.1" + "version": "==3.2.0" }, "pycodestyle": { "hashes": [ diff --git a/src/test/regress/citus_tests/config.py b/src/test/regress/citus_tests/config.py index c5b7ce010..3dc47671b 100644 --- a/src/test/regress/citus_tests/config.py +++ b/src/test/regress/citus_tests/config.py @@ -15,6 +15,8 @@ WORKER2 = "worker2" REGULAR_USER_NAME = "regularuser" SUPER_USER_NAME = "postgres" +DATABASE_NAME = "postgres" + ARBITRARY_SCHEDULE_NAMES = [ "create_schedule", "sql_schedule", @@ -96,6 +98,7 @@ class CitusBaseClusterConfig(object, metaclass=NewInitCaller): self.temp_dir = CITUS_ARBITRARY_TEST_DIR self.worker_amount = 2 self.user = REGULAR_USER_NAME + self.dbname = DATABASE_NAME self.is_mx = True self.is_citus = True self.name = type(self).__name__ diff --git a/src/test/regress/citus_tests/query_generator/.gitignore b/src/test/regress/citus_tests/query_generator/.gitignore new file mode 100644 index 000000000..c397a501d --- /dev/null +++ b/src/test/regress/citus_tests/query_generator/.gitignore @@ -0,0 +1,2 @@ +__pycache__ +*-env diff --git a/src/test/regress/citus_tests/query_generator/README.md b/src/test/regress/citus_tests/query_generator/README.md new file mode 100644 index 000000000..cf4d54b0e --- /dev/null +++ b/src/test/regress/citus_tests/query_generator/README.md @@ -0,0 +1,223 @@ +## Goal of the Tool +Tool supports a simple syntax to be useful to generate queries with different join orders. Main motivation for me to create the tool was to compare results of the generated queries for different [Citus](https://github.com/citusdata/citus) tables and Postgres tables. That is why we support a basic syntax for now. It can be extended to support different queries. + +## Query Generator for Postgres +Tool generates SELECT queries, whose depth can be configured, with different join orders. It also generates DDLs required for query execution. +You can also tweak configuration parameters for data inserting command generation. + +## How to Run Citus Join Verification? +You can verify if Citus breaks any default PG join behaviour via `citus_compare_dist_local_joins.sh`. It creates +tables specified in config. Then, it runs generated queries on those tables and saves the results into `out/dist_queries.out`. +After running those queries for Citus tables, it creates PG tables with the same names as previous run, executes the same +queries, and saves the results into `out/local_queries.out`. In final step, it generates diff between local and distributed results. +You can see the contents of `out/local_dist_diffs` to see if there is any Citus unsupported query. + +1. Create a Citus local cluster with 2 workers by using [citus_dev](https://github.com/citusdata/tools/tree/develop/citus_dev) tool +(Note: make sure you do not configure psql via .psqlrc file as it would fail the test.) +```bash +citus_dev make testCluster --destroy +``` +2. Run the test, +```bash +cd src/test/regress/citus_tests/query_generator/bin +bash citus_compare_dist_local_joins.sh +``` +3. See the diff content in `src/test/regress/citus_tests/query_generator/out/local_dist_diffs` + +### Configuration +You can configure 3 different parts: + +- [DDL Configuration](#ddl-configuration) +- [Data Insertion Configuration](#data-insertion-configuration) +- [Query Configuration](#query-configuration) + +## DDL Configuration +Tool generates related ddl commands before generating queries. + +Schema for DDL configuration: +```yaml +ddlOutFile: +commonColName: +targetTables: + - Table: + name: + citusType: + maxAllowedUseOnQuery: + rowCount: + nullRate: + duplicateRate: + columns: + - Column: + name: + type: + distinctCopyCount: +``` + +Explanation: +```yaml +ddlOutFile: "file to write generated DDL commands" +commonColName: "name of the column that will be used as distribution column, filter column in restrictions and target column in selections" +targetTables: "array of tables that will be used in generated queries" + - Table: + name: "name prefix of table" + citusType: "citus type of table" + maxAllowedUseOnQuery: "limits how many times table can appear in query" + rowCount: "total # of rows that will be inserted into table" + nullRate: "percentage of null rows in rowCount that will be inserted into table" + duplicateRate: "percentage of duplicates in rowCount that will be inserted into table" + columns: "array of columns in table" + - Column: + name: "name of column" + type: "name of data type of column(only support 'int' now)" + distinctCopyCount: "how many tables with the same configuration we should create(only by changing full name, still using the same name prefix)" +``` + + +## Data Insertion Configuration +Tool generates data insertion commands if you want tables with filled data. You can configure total number of rows, what percentage of them should +be null and what percentage of them should be duplicated. For related configuration see Table schema at [DDL Configuration](#ddl-configuration). You +can also configure range of the randomly generated data. See `dataRange` at [Query Configuration](#query-configuration). + +## Query Configuration +After generation of ddls and data insertion commands, the tool generates queries. + +Schema for Query configuration: +```yaml +queryCount: +queryOutFile: +semiAntiJoin: +cartesianProduct: +limit: +orderby: +forceOrderbyWithLimit: +useAvgAtTopLevelTarget: +dataRange: + from: + to: +filterRange: + from: + to: +limitRange: + from: + to: +targetRteCount: +targetCteCount: +targetCteRteCount: +targetJoinTypes: +targetRteTypes: +targetRestrictOps: +``` + +Explanation: +```yaml +queryCount: "number of queries to generate" +queryOutFile: "file to write generated queries" +semiAntiJoin: "should we support semi joins (WHERE col IN (Subquery))" +cartesianProduct: "should we support cartesian joins" +limit: "should we support limit clause" +orderby: "should we support order by clause" +forceOrderbyWithLimit: "should we force order by when we use limit" +useAvgAtTopLevelTarget: "should we make top level query as select avg() from (subquery)" +dataRange: + from: "starting boundary for data generation" + to: "end boundary for data generation" +filterRange: + from: "starting boundary for restriction clause" + to: "end boundary for restriction clause" +limitRange: + from: "starting boundary for limit clause" + to: "end boundary for data limit clause" +targetRteCount: "limits how many rtes should exist in non-cte part of the query" +targetCteCount: "limits how many ctes should exist in query" +targetCteRteCount: "limits how many rtes should exist in cte part of the query" +targetJoinTypes: "supported join types" +targetRteTypes: "supported rte types" +targetRestrictOps: "supported restrict ops" +``` + +## Misc Configuration +Tool has some configuration options which does not suit above 3 parts. + +Schema for misc configuration: +```yaml +interactiveMode: +``` + +Explanation: +```yaml +interactiveMode: "when true, interactively prints generated ddls and queries. Otherwise, it writes them to configured files." +``` + +## Supported Operations +It uses `commonColName` for any kind of target selection required for any supported query clause. + +### Column Type Support +Tool currently supports only int data type, but plans to support other basic types. + +### Join Support +Tool supports following joins: +```yaml +targetJoinTypes: + - INNER + - LEFT + - RIGHT + - FULL +``` + +### Citus Table Support +Tool supports following citus table types: +```yaml +targetTables: + - Table: + ... + citusType: + ... +``` + +### Restrict Operation Support +Tool supports following restrict operations: +```yaml +targetRestrictOps: + - LT + - GT + - EQ +``` + +### Rte Support +Tool supports following rtes: +```yaml +targetRteTypes: + - RELATION + - SUBQUERY + - CTE + - VALUES +``` + +## How to Generate Queries? +You have 2 different options. + +- [Interactive Mode](#interactive-mode) +- [File Mode](#file-mode) + +### Interactive Mode +In this mode, you will be prompted to continue generating a query. When you hit to `Enter`, it will continue generating them. +You will need to hit to `x` to exit. + +1. Configure `interactiveMode: true` in config.yml, +2. Run the command shown below +```bash +cd src/test/regress/citus_tests/query_generator +python generate_queries.py +``` + +### File Mode +In this mode, generated ddls and queries will be written into the files configured in [config.yml](./config/config.yaml). + +1. Configure `interactiveMode: false`, +2. Configure `queryCount: `, +3. Configure `queryOutFile: ` and `ddlOutFile: ` +4. Run the command shown below +```bash +cd src/test/regress/citus_tests/query_generator +python generate_queries.py +``` diff --git a/src/test/regress/citus_tests/query_generator/bin/citus_compare_dist_local_joins.sh b/src/test/regress/citus_tests/query_generator/bin/citus_compare_dist_local_joins.sh new file mode 100644 index 000000000..f921bec0b --- /dev/null +++ b/src/test/regress/citus_tests/query_generator/bin/citus_compare_dist_local_joins.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# make bash behave +set -euo pipefail + +psql_user=$1 +psql_db=$2 +psql_port=$3 + +runDDLs() +{ + # run ddls + psql -U "${psql_user}" -d "${psql_db}" -p "${psql_port}" -f "${out_folder}"/ddls.sql > /dev/null +} + +runUndistributeTables() +{ + undistribute_all_tables_command='SELECT undistribute_table(logicalrelid) FROM pg_dist_partition;' + # run undistribute all tables + psql -U "${psql_user}" -d "${psql_db}" -p "${psql_port}" -c "${undistribute_all_tables_command}" > /dev/null +} + +runQueries() +{ + out_filename=$1 + + # run dmls + # echo queries and comments for query tracing + psql -U "${psql_user}" -d "${psql_db}" -p "${psql_port}" \ + --echo-all \ + -f "${out_folder}"/queries.sql > "${out_filename}" 2>&1 +} + +showDiffs() +{ + pushd . && cd "${script_folder}" && python3 diff-checker.py && popd +} + +# run query generator and let it create output ddls and queries +script_folder=$(dirname "$0") +out_folder="${script_folder}"/../out +pushd . && cd "${script_folder}"/.. && python3 generate_queries.py && popd + +# remove result files if exists +rm -rf "${out_folder}"/dist_queries.out "${out_folder}"/local_queries.out + +# run ddls +runDDLs + +# runs dmls for distributed tables +runQueries "${out_folder}"/dist_queries.out + +# undistribute all dist tables +runUndistributeTables + +# runs the same dmls for pg local tables +runQueries "${out_folder}"/local_queries.out + +# see diffs in results +showDiffs diff --git a/src/test/regress/citus_tests/query_generator/bin/diff-checker.py b/src/test/regress/citus_tests/query_generator/bin/diff-checker.py new file mode 100644 index 000000000..5bd2898a9 --- /dev/null +++ b/src/test/regress/citus_tests/query_generator/bin/diff-checker.py @@ -0,0 +1,122 @@ +import os +import re +import subprocess +import sys + + +def createPatternForFailedQueryBlock(acceptableErrors): + totalAcceptableOrders = len(acceptableErrors) + + failedQueryBlockPattern = "-- queryId: [0-9]+\n.*\npsql:.*(?:" + for acceptableErrorIdx in range(totalAcceptableOrders): + failedQueryBlockPattern += acceptableErrors[acceptableErrorIdx] + if acceptableErrorIdx < totalAcceptableOrders - 1: + failedQueryBlockPattern += "|" + failedQueryBlockPattern += ")" + + return failedQueryBlockPattern + + +def findFailedQueriesFromFile(queryOutFile, acceptableErrors): + failedQueryIds = [] + outFileContent = "" + failedQueryBlockPattern = createPatternForFailedQueryBlock(acceptableErrors) + queryIdPattern = "queryId: ([0-9]+)" + with open(queryOutFile, "r") as f: + outFileContent = f.read() + failedQueryContents = re.findall(failedQueryBlockPattern, outFileContent) + failedQueryIds = [ + re.search(queryIdPattern, failedQueryContent)[1] + for failedQueryContent in failedQueryContents + ] + + return failedQueryIds + + +def removeFailedQueryOutputFromFile(outFile, failedQueryIds): + if len(failedQueryIds) == 0: + return + + distOutFileContentAsLines = [] + with open(outFile, "r") as f: + distOutFileContentAsLines = f.readlines() + + with open(outFile, "w") as f: + clear = False + nextIdx = 0 + nextQueryIdToDelete = failedQueryIds[nextIdx] + queryIdPattern = "queryId: ([0-9]+)" + + for line in distOutFileContentAsLines: + matched = re.search(queryIdPattern, line) + # founded line which contains query id + if matched: + # query id matches with the next failed query's id + if nextQueryIdToDelete == matched[1]: + # clear lines until we find succesfull query + clear = True + nextIdx += 1 + if nextIdx < len(failedQueryIds): + nextQueryIdToDelete = failedQueryIds[nextIdx] + else: + # we found successfull query + clear = False + + if not clear: + f.write(line) + return + + +def removeFailedQueryOutputFromFiles(distQueryOutFile, localQueryOutFile): + # remove the failed distributed query from both local and distributed query files to prevent diff for acceptable errors + # some of generated queries fails with below errors due to https://github.com/citusdata/citus/issues/6653, we skip those until we support them + acceptableErrors = [ + "ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns", + "ERROR: recursive complex joins are only supported when all distributed tables are co-located and joined on their distribution columns", + ] + failedDistQueryIds = findFailedQueriesFromFile(distQueryOutFile, acceptableErrors) + removeFailedQueryOutputFromFile(distQueryOutFile, failedDistQueryIds) + removeFailedQueryOutputFromFile(localQueryOutFile, failedDistQueryIds) + return + + +def showDiffs(distQueryOutFile, localQueryOutFile, diffFile): + exitCode = 1 + with open(diffFile, "w") as f: + diffCommand = "diff -u {} {}".format(localQueryOutFile, distQueryOutFile) + process = subprocess.Popen(diffCommand.split(), stdout=f, stderr=f, shell=False) + process.wait() + exitCode = process.returncode + + print("diff exit {}".format(exitCode)) + return exitCode + + +def exitIfAnyLocalQueryFailed(localQueryOutFile): + allErrors = ["ERROR:"] + failedLocalQueryIds = findFailedQueriesFromFile(localQueryOutFile, allErrors) + assert ( + len(failedLocalQueryIds) == 0 + ), """There might be an internal error related to query generator or + we might find a Postgres bug. Check local_queries.out to see the error.""" + return + + +if __name__ == "__main__": + scriptDirPath = os.path.dirname(os.path.abspath(__file__)) + outFolderPath = scriptDirPath + "/../out" + localQueryOutFile = "{}/local_queries.out".format(outFolderPath) + distQueryOutFile = "{}/dist_queries.out".format(outFolderPath) + diffFile = "{}/local_dist.diffs".format(outFolderPath) + + # exit if we have any error from local queries + exitIfAnyLocalQueryFailed(localQueryOutFile) + + # find failed queries from distQueryOutFile and then remove failed queries and their results from + # both distQueryOutFile and localQueryOutFile + removeFailedQueryOutputFromFiles(distQueryOutFile, localQueryOutFile) + + # show diffs in unified format + exitCode = showDiffs(distQueryOutFile, localQueryOutFile, diffFile) + + sys.exit(exitCode) diff --git a/src/test/regress/citus_tests/query_generator/bin/run_query_compare_test.py b/src/test/regress/citus_tests/query_generator/bin/run_query_compare_test.py new file mode 100755 index 000000000..9ab172ef7 --- /dev/null +++ b/src/test/regress/citus_tests/query_generator/bin/run_query_compare_test.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +"""query_gen_test +Usage: + run_query_compare_test --bindir= --pgxsdir= + +Options: + --bindir= PostgreSQL executable directory(ex: '~/.pgenv/pgsql-10.4/bin') + --pgxsdir= Path to the PGXS directory(ex: ~/.pgenv/src/postgresql-11.3) +""" + +import os +import subprocess +import sys + +from docopt import docopt + +# https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time/14132912#14132912 +sys.path.append( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +) + + +# ignore E402 because these imports require addition to path +import common # noqa: E402 + +import config as cfg # noqa: E402 + + +def run_test(config): + # start cluster + common.initialize_temp_dir(cfg.CITUS_ARBITRARY_TEST_DIR) + common.initialize_citus_cluster( + config.bindir, config.datadir, config.settings, config + ) + + # run test + scriptDirPath = os.path.dirname(os.path.abspath(__file__)) + testRunCommand = "bash {}/citus_compare_dist_local_joins.sh {} {} {}".format( + scriptDirPath, config.user, config.dbname, config.coordinator_port() + ) + process = subprocess.Popen( + testRunCommand.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + stdout, stderr = process.communicate() + + # stop cluster + common.stop_databases( + config.bindir, config.datadir, config.node_name_to_ports, config.name + ) + + print(stdout) + print(stderr) + print(process.returncode) + sys.exit(process.returncode) + + +if __name__ == "__main__": + citusClusterConfig = cfg.CitusSuperUserDefaultClusterConfig( + docopt(__doc__, version="run_query_compare_test") + ) + + run_test(citusClusterConfig) diff --git a/src/test/regress/citus_tests/query_generator/config/config.py b/src/test/regress/citus_tests/query_generator/config/config.py new file mode 100644 index 000000000..85def6b79 --- /dev/null +++ b/src/test/regress/citus_tests/query_generator/config/config.py @@ -0,0 +1,129 @@ +import copy + +import yaml +from config.config_parser import ( + parseJoinTypeArray, + parseRange, + parseRestrictOpArray, + parseRteTypeArray, + parseTableArray, +) +from node_defs import CitusType + + +class Config: + def __init__(self): + configObj = Config.parseConfigFile("config/config.yaml") + + self.targetTables = _distinctCopyTables( + parseTableArray(configObj["targetTables"]) + ) + self.targetJoinTypes = parseJoinTypeArray(configObj["targetJoinTypes"]) + self.targetRteTypes = parseRteTypeArray(configObj["targetRteTypes"]) + self.targetRestrictOps = parseRestrictOpArray(configObj["targetRestrictOps"]) + self.commonColName = configObj["commonColName"] + self.targetRteCount = configObj["targetRteCount"] + self.targetCteCount = configObj["targetCteCount"] + self.targetCteRteCount = configObj["targetCteRteCount"] + self.semiAntiJoin = configObj["semiAntiJoin"] + self.cartesianProduct = configObj["cartesianProduct"] + self.limit = configObj["limit"] + self.orderby = configObj["orderby"] + self.forceOrderbyWithLimit = configObj["forceOrderbyWithLimit"] + self.useAvgAtTopLevelTarget = configObj["useAvgAtTopLevelTarget"] + self.interactiveMode = configObj["interactiveMode"] + self.queryOutFile = configObj["queryOutFile"] + self.ddlOutFile = configObj["ddlOutFile"] + self.queryCount = configObj["queryCount"] + self.dataRange = parseRange(configObj["dataRange"]) + self.filterRange = parseRange(configObj["filterRange"]) + self.limitRange = parseRange(configObj["limitRange"]) + self._ensureRteLimitsAreSane() + # print(self) + + def __repr__(self): + rep = "targetRteCount: {}\n".format(self.targetRteCount) + rep += "targetCteCount: {}\n".format(self.targetCteCount) + rep += "targetCteRteCount: {}\n".format(self.targetCteRteCount) + + rep += "targetRteTypes:\n" + for rteType in self.targetRteTypes: + rep += "\t{}\n".format(rteType) + + rep += "targetJoinTypes:\n" + for joinType in self.targetJoinTypes: + rep += "\t{}\n".format(joinType) + + rep += "restrictOps:\n" + for restrictOp in self.targetRestrictOps: + rep += "\t{}\n".format(restrictOp) + + return rep + + def _ensureRteLimitsAreSane(self): + totalAllowedUseForAllTables = 0 + for table in self.targetTables: + totalAllowedUseForAllTables += table.maxAllowedUseOnQuery + assert ( + totalAllowedUseForAllTables >= self.targetRteCount + ), """targetRteCount cannot be greater than sum of maxAllowedUseOnQuery for all tables. + Set targetRteCount to a lower value or increase maxAllowedUseOnQuery for tables at config.yml.""" + + @staticmethod + def parseConfigFile(path): + try: + with open(path, "r") as configFile: + return yaml.load(configFile, yaml.Loader) + except IOError as e: + print(f"I/O error({0}): {1}".format(e.errno, e.strerror)) + raise BaseException("cannot parse config.yaml") + except Exception: + print("Unexpected error while parsing config.yml.") + + +_config = None + + +def resetConfig(): + global _config + _config = Config() + + +def getConfig(): + return _config + + +def getAllTableNames(): + """returns table names from target tables given at config""" + tables = getConfig().targetTables + tableNames = [table.name for table in tables] + return tableNames + + +def getMaxAllowedCountForTable(tableName): + tables = getConfig().targetTables + filtered = filter(lambda el: el.name == tableName, tables) + filtered = list(filtered) + assert len(filtered) == 1 + return filtered[0].maxAllowedUseOnQuery + + +def isTableDistributed(table): + return table.citusType == CitusType.DISTRIBUTED + + +def isTableReference(table): + return table.citusType == CitusType.REFERENCE + + +def _distinctCopyTables(tables): + distinctCopyTables = [] + for table in tables: + distinctCopyCount = table.distinctCopyCount + for tblIdx in range(1, distinctCopyCount): + distinctCopyTable = copy.deepcopy(table) + distinctCopyTable.name += str(tblIdx) + distinctCopyTables.append(distinctCopyTable) + table.name += "0" + tables.extend(distinctCopyTables) + return tables diff --git a/src/test/regress/citus_tests/query_generator/config/config.yaml b/src/test/regress/citus_tests/query_generator/config/config.yaml new file mode 100644 index 000000000..1920966ee --- /dev/null +++ b/src/test/regress/citus_tests/query_generator/config/config.yaml @@ -0,0 +1,65 @@ +interactiveMode: false +queryCount: 250 +queryOutFile: queries.sql +ddlOutFile: ddls.sql +semiAntiJoin: true +cartesianProduct: false +limit: true +orderby: true +forceOrderbyWithLimit: true +useAvgAtTopLevelTarget: true +dataRange: + from: 0 + to: 10 +filterRange: + from: 0 + to: 10 +limitRange: + from: 0 + to: 10 +targetRteCount: 40 +targetCteCount: 3 +targetCteRteCount: 2 + +commonColName: id + +targetTables: + - Table: + name: dist + citusType: DISTRIBUTED + maxAllowedUseOnQuery: 10 + rowCount: 10 + nullRate: 0.1 + duplicateRate: 0.1 + columns: + - Column: + name: id + type: int + distinctCopyCount: 2 + - Table: + name: ref + citusType: REFERENCE + maxAllowedUseOnQuery: 10 + rowCount: 10 + nullRate: 0.1 + duplicateRate: 0.1 + columns: + - Column: + name: id + type: int + distinctCopyCount: 2 + +targetJoinTypes: + - INNER + - LEFT + - RIGHT + - FULL + +targetRteTypes: + - RELATION + - SUBQUERY + - CTE + +targetRestrictOps: + - LT + - GT diff --git a/src/test/regress/citus_tests/query_generator/config/config_parser.py b/src/test/regress/citus_tests/query_generator/config/config_parser.py new file mode 100755 index 000000000..0b4f3837f --- /dev/null +++ b/src/test/regress/citus_tests/query_generator/config/config_parser.py @@ -0,0 +1,81 @@ +from node_defs import CitusType, Column, JoinType, RestrictOp, RTEType, Table + + +def parseJoinType(joinTypeText): + return JoinType[joinTypeText] + + +def parseJoinTypeArray(joinTypeTexts): + joinTypes = [] + for joinTypeText in joinTypeTexts: + joinType = parseJoinType(joinTypeText) + joinTypes.append(joinType) + return joinTypes + + +def parseRteType(rteTypeText): + return RTEType[rteTypeText] + + +def parseRteTypeArray(rteTypeTexts): + rteTypes = [] + for rteTypeText in rteTypeTexts: + rteType = parseRteType(rteTypeText) + rteTypes.append(rteType) + return rteTypes + + +def parseRestrictOp(restrictOpText): + return RestrictOp[restrictOpText] + + +def parseRestrictOpArray(restrictOpTexts): + restrictOps = [] + for restrictOpText in restrictOpTexts: + restrictOp = parseRestrictOp(restrictOpText) + restrictOps.append(restrictOp) + return restrictOps + + +def parseTable(targetTableDict): + name = targetTableDict["name"] + citusType = CitusType[targetTableDict["citusType"]] + maxAllowedUseOnQuery = targetTableDict["maxAllowedUseOnQuery"] + rowCount = targetTableDict["rowCount"] + nullRate = targetTableDict["nullRate"] + duplicateRate = targetTableDict["duplicateRate"] + columns = [] + for columnDict in targetTableDict["columns"]: + col = parseColumn(columnDict) + columns.append(col) + distinctCopyCount = targetTableDict["distinctCopyCount"] + return Table( + name, + citusType, + maxAllowedUseOnQuery, + rowCount, + nullRate, + duplicateRate, + columns, + distinctCopyCount, + ) + + +def parseTableArray(targetTableDicts): + tables = [] + for targetTableDict in targetTableDicts: + table = parseTable(targetTableDict["Table"]) + tables.append(table) + return tables + + +def parseColumn(targetColumnDict): + name = targetColumnDict["name"] + type = targetColumnDict["type"] + return Column(name, type) + + +def parseRange(rangeDict): + fromVal = rangeDict["from"] + toVal = rangeDict["to"] + return (fromVal, toVal) diff --git a/src/test/regress/citus_tests/query_generator/data_gen.py b/src/test/regress/citus_tests/query_generator/data_gen.py new file mode 100644 index 000000000..1d508acaf --- /dev/null +++ b/src/test/regress/citus_tests/query_generator/data_gen.py @@ -0,0 +1,81 @@ +from node_defs import CitusType + +from config.config import getConfig + + +def getTableData(): + dataGenerationSql = "" + + tableIdx = 1 + (fromVal, toVal) = getConfig().dataRange + tables = getConfig().targetTables + for table in tables: + # generate base rows + dataGenerationSql += _genOverlappingData(table.name, fromVal, table.rowCount) + dataGenerationSql += _genNonOverlappingData(table.name, toVal, tableIdx) + dataGenerationSql += "\n" + + # generate null rows + if not table.citusType == CitusType.DISTRIBUTED: + targetNullRows = int(table.rowCount * table.nullRate) + dataGenerationSql += _genNullData(table.name, targetNullRows) + dataGenerationSql += "\n" + + # generate duplicate rows + targetDuplicateRows = int(table.rowCount * table.duplicateRate) + dataGenerationSql += _genDupData(table.name, targetDuplicateRows) + dataGenerationSql += "\n\n" + tableIdx += 1 + return dataGenerationSql + + +def _genOverlappingData(tableName, startVal, rowCount): + """returns string to fill table with [startVal,startVal+rowCount] range of integers""" + dataGenerationSql = "" + dataGenerationSql += "INSERT INTO " + tableName + dataGenerationSql += ( + " SELECT i FROM generate_series(" + + str(startVal) + + "," + + str(startVal + rowCount) + + ") i;" + ) + return dataGenerationSql + + +def _genNullData(tableName, nullCount): + """returns string to fill table with NULLs""" + dataGenerationSql = "" + dataGenerationSql += "INSERT INTO " + tableName + dataGenerationSql += ( + " SELECT NULL FROM generate_series(0," + str(nullCount) + ") i;" + ) + return dataGenerationSql + + +def _genDupData(tableName, dupRowCount): + """returns string to fill table with duplicate integers which are fetched from given table""" + dataGenerationSql = "" + dataGenerationSql += "INSERT INTO " + tableName + dataGenerationSql += ( + " SELECT * FROM " + + tableName + + " ORDER BY " + + getConfig().commonColName + + " LIMIT " + + str(dupRowCount) + + ";" + ) + return dataGenerationSql + + +def _genNonOverlappingData(tableName, startVal, tableIdx): + """returns string to fill table with different integers for given table""" + startVal = startVal + tableIdx * 20 + endVal = startVal + 20 + dataGenerationSql = "" + dataGenerationSql += "INSERT INTO " + tableName + dataGenerationSql += ( + " SELECT i FROM generate_series(" + str(startVal) + "," + str(endVal) + ") i;" + ) + return dataGenerationSql diff --git a/src/test/regress/citus_tests/query_generator/ddl_gen.py b/src/test/regress/citus_tests/query_generator/ddl_gen.py new file mode 100755 index 000000000..b2f97f694 --- /dev/null +++ b/src/test/regress/citus_tests/query_generator/ddl_gen.py @@ -0,0 +1,48 @@ +from config.config import getConfig, isTableDistributed, isTableReference + + +def getTableDDLs(): + ddls = "" + tables = getConfig().targetTables + for table in tables: + ddls += _genTableDDL(table) + ddls += "\n" + return ddls + + +def _genTableDDL(table): + ddl = "" + ddl += "DROP TABLE IF EXISTS " + table.name + ";" + ddl += "\n" + + ddl += "CREATE TABLE " + table.name + "(" + for column in table.columns[:-1]: + ddl += _genColumnDDL(column) + ddl += ",\n" + if len(table.columns) > 0: + ddl += _genColumnDDL(table.columns[-1]) + ddl += ");\n" + + if isTableDistributed(table): + ddl += ( + "SELECT create_distributed_table(" + + "'" + + table.name + + "','" + + getConfig().commonColName + + "'" + + ");" + ) + ddl += "\n" + elif isTableReference(table): + ddl += "SELECT create_reference_table(" + "'" + table.name + "'" + ");" + ddl += "\n" + return ddl + + +def _genColumnDDL(column): + ddl = "" + ddl += column.name + ddl += " " + ddl += column.type + return ddl diff --git a/src/test/regress/citus_tests/query_generator/generate_queries.py b/src/test/regress/citus_tests/query_generator/generate_queries.py new file mode 100755 index 000000000..4f1f7967b --- /dev/null +++ b/src/test/regress/citus_tests/query_generator/generate_queries.py @@ -0,0 +1,65 @@ +import signal +import sys + +from data_gen import getTableData +from ddl_gen import getTableDDLs +from query_gen import newQuery + +from config.config import getConfig, resetConfig + + +def _signal_handler(sig, frame): + sys.exit(0) + + +def _interactiveMode(ddls, data): + print(ddls) + print(data) + + while True: + res = input("Press x to exit or Enter to generate more") + if res.lower() == "x": + print("Exit from query generation mode!") + sys.exit(0) + + query = newQuery() + print(query) + + resetConfig() + + +def _fileMode(ddls, data): + ddlFileName = "out/" + getConfig().ddlOutFile + with open(ddlFileName, "w") as ddlFile: + ddlFile.writelines([ddls, data]) + + queryCount = getConfig().queryCount + fileName = "out/" + getConfig().queryOutFile + with open(fileName, "w") as f: + # enable repartition joins due to https://github.com/citusdata/citus/issues/6865 + enableRepartitionJoinCommand = "SET citus.enable_repartition_joins TO on;\n" + queryLines = [enableRepartitionJoinCommand] + queryId = 1 + for _ in range(queryCount): + query = newQuery() + + queryLine = "-- queryId: " + str(queryId) + "\n" + queryLine += query + "\n\n" + + queryLines.append(queryLine) + queryId += 1 + f.writelines(queryLines) + + +if __name__ == "__main__": + signal.signal(signal.SIGINT, _signal_handler) + + resetConfig() + + ddls = getTableDDLs() + data = getTableData() + + if getConfig().interactiveMode: + _interactiveMode(ddls, data) + else: + _fileMode(ddls, data) diff --git a/src/test/regress/citus_tests/query_generator/node_defs.py b/src/test/regress/citus_tests/query_generator/node_defs.py new file mode 100755 index 000000000..b0db1da63 --- /dev/null +++ b/src/test/regress/citus_tests/query_generator/node_defs.py @@ -0,0 +1,55 @@ +from enum import Enum + + +class JoinType(Enum): + INNER = 1 + LEFT = 2 + RIGHT = 3 + FULL = 4 + + +class RTEType(Enum): + RELATION = 1 + SUBQUERY = 2 + CTE = 3 + VALUES = 4 + + +class RestrictOp(Enum): + LT = 1 + GT = 2 + EQ = 3 + + +class CitusType(Enum): + DISTRIBUTED = 1 + REFERENCE = 2 + POSTGRES = 3 + + +class Table: + def __init__( + self, + name, + citusType, + maxAllowedUseOnQuery, + rowCount, + nullRate, + duplicateRate, + columns, + distinctCopyCount, + ): + self.name = name + self.citusType = citusType + self.maxAllowedUseOnQuery = maxAllowedUseOnQuery + self.rowCount = rowCount + self.nullRate = nullRate + self.duplicateRate = duplicateRate + self.columns = columns + self.distinctCopyCount = distinctCopyCount + + +class Column: + def __init__(self, name, type): + self.name = name + self.type = type diff --git a/src/test/regress/citus_tests/query_generator/out/.gitignore b/src/test/regress/citus_tests/query_generator/out/.gitignore new file mode 100644 index 000000000..d6b7ef32c --- /dev/null +++ b/src/test/regress/citus_tests/query_generator/out/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/src/test/regress/citus_tests/query_generator/query_gen.py b/src/test/regress/citus_tests/query_generator/query_gen.py new file mode 100644 index 000000000..e25525d29 --- /dev/null +++ b/src/test/regress/citus_tests/query_generator/query_gen.py @@ -0,0 +1,434 @@ +import random + +from node_defs import RTEType +from random_selections import ( + randomJoinOp, + randomRestrictOp, + randomRteType, + shouldSelectThatBranch, +) + +from config.config import getAllTableNames, getConfig, getMaxAllowedCountForTable + +# query_gen.py generates a new query from grammar rules below. It randomly chooses allowed rules +# to generate a query. Here are some important notes about the query generation: +# +# - We enforce the max allowed # of usage for each table. It also enforces global total # of tables. +# - Table names, restriction types and any other selections are chosen between the values provided by +# configuration file (config.yml). +# - Entry point for the generator is newQuery and all other methods are internal methods. We pass a context +# object named GeneratorContext to all internal methods as parameter to perform checks and generations +# on the query via the context. +# - shouldSelectThatBranch() is useful utility method to randomly chose a grammar rule. Some of the rules +# are only selected if we allowed them in configuration file. +# - We enforce table limits separately if we are inside cte part of the query(see targetCteRteCount). +# We also enforce max # of ctes for a query. +# - commonColName from the config is used at select and where clauses. +# - useAvgAtTopLevelTarget is useful to return single row as query result. It is also useful to track Citus +# query bugs via run_query_compare_test.py. +# - '=' restriction is removed from the config by default to return values different than null most of the time. +# - 'RTE.VALUES' is also removed from the config for the same reason as above. +# - Filter range is selected as same with data range for the same reason as above. +# - aliasStack at GeneratorContext is useful to put correct table names into where clause. + +# grammar syntax +# +# ======Assumptions====== +# 1. Tables has common dist col +# 2. All operations execute on common column for all tables. +# +# TODO: RTE_FUNCTION, RTE_TABLEFUNC +# +# ====SYNTAX==== +# ===Nonterminals=== +# Query +# SelectExpr +# FromExpr +# RestrictExpr +# RteList +# Rte +# SubqueryRte +# RelationRte +# JoinList +# JoinOp +# Using +# RestrictList +# Restrict +# CteRte +# CteList +# Cte +# ValuesRte +# Limit +# OrderBy +# +# ===Terminals=== +# e 'SELECT' 'FROM' 'INNER JOIN' 'LEFT JOIN' 'RIGHT JOIN' 'FULL JOIN' 'WHERE' 'LIMIT' 'USING' 'WITH' +# 'ORDER BY' 'VALUES' 'IN' 'NOT' 'AS' '<' '>' '=' '*' ',' ';' '(' ')' +# +# ===Rules=== +# Start -> Query ';' || 'WITH' CteList Query ';' +# Query -> SelectExpr FromExpr [OrderBy] [Limit] || 'SELECT' 'avg(avgsub.DistColName)' 'FROM' SubqueryRte 'AS avgsub' +# SelectExpr -> 'SELECT' 'curAlias()' '.' DistColName +# FromExpr -> 'FROM' (Rte JoinList JoinOp Rte Using || RteList) ['WHERE' 'nextRandomAlias()' '.' DistColName RestrictExpr] +# RestrictExpr -> ('<' || '>' || '=') Int || ['NOT'] 'IN' SubqueryRte +# JoinList -> JoinOp Rte Using JoinList || e +# Using -> 'USING' '(' DistColName ')' +# RteList -> Rte [, RteList] || Rte +# Rte -> SubqueryRte 'AS' 'nextRandomAlias()' || RelationRte 'AS' 'nextRandomAlias()' || +# CteRte 'AS' 'nextRandomAlias()' || ValuesRte 'AS' 'nextRandomAlias()' +# SubqueryRte -> '(' Query ')' +# RelationRte -> 'nextRandomTableName()' +# CteRte -> 'randomCteName()' +# CteList -> Cte [',' CteList] || Cte +# Cte -> 'nextRandomAlias()' 'AS' '(' Query ')' +# ValuesRte -> '(' 'VALUES' '(' 'random()' ')' ')' +# JoinOp -> 'INNER JOIN' || 'LEFT JOIN' || 'RIGHT JOIN' || 'FULL JOIN' +# Limit -> 'LIMIT' 'random()' +# OrderBy -> 'ORDER BY' DistColName +# DistColName -> 'hardwired(get from config)' + + +class GeneratorContext: + """context to store some variables which should be available during generation""" + + def __init__(self): + # each level's last table is used in WHERE clause for the level + self.aliasStack = [] + # tracks if we are inside cte as we should not refer cte inside cte + self.insideCte = False + # total rtes in cte + non-cte parts + self.totalRteCount = 0 + # rte count in non-cte part to enforce non-cte rte limit + self.currentRteCount = 0 + # cte count to enforce cte limit + self.currentCteCount = 0 + # rte count in cte part to enforce rte limit in cte + self.currentCteRteCount = 0 + # rte counts per table to enforce rte limit per table + self.perTableRtes = {} + # tables which hit count limit + self.disallowedTables = set() + # useful to track usage avg only at first select + self.usedAvg = False + + def randomCteName(self): + """returns a randomly selected cte name""" + randCteRef = random.randint(0, self.currentCteCount - 1) + return " cte_" + str(randCteRef) + + def curAlias(self): + """returns current alias name to be used for the current table""" + return " table_" + str(self.totalRteCount) + + def curCteAlias(self): + """returns current alias name to be used for the current cte""" + return " cte_" + str(self.currentCteCount) + + def hasAnyCte(self): + """returns if context has any cte""" + return self.currentCteCount > 0 + + def canGenerateNewRte(self): + """checks if context exceeds allowed rte count""" + return self.currentRteCount < getConfig().targetRteCount + + def canGenerateNewCte(self): + """checks if context exceeds allowed cte count""" + return self.currentCteCount < getConfig().targetCteCount + + def canGenerateNewRteInsideCte(self): + """checks if context exceeds allowed rte count inside a cte""" + return self.currentCteRteCount < getConfig().targetCteRteCount + + def addAlias(self, alias): + """adds given alias to the stack""" + self.aliasStack.append(alias) + + def removeLastAlias(self): + """removes the latest added alias from the stack""" + return self.aliasStack.pop() + + def getRteNameEnforcingRteLimits(self): + """returns rteName by enforcing rte count limits for tables""" + # do not enforce per table rte limit if we are inside cte + if self.insideCte: + rteName = random.choice(getAllTableNames()) + return " " + rteName + " " + + while True: + # keep trying to find random table by eliminating the ones which hit table limit + allowedNames = set(getAllTableNames()) - self.disallowedTables + assert len(allowedNames) > 0 + rteName = random.choice(list(allowedNames)) + + # not yet added to rte count map, so we can allow the name + if rteName not in self.perTableRtes: + self.perTableRtes[rteName] = 0 + break + # limit is not exceeded, so we can allow the name + if self.perTableRtes[rteName] < getMaxAllowedCountForTable(rteName): + break + else: + self.disallowedTables.add(rteName) + + # increment rte count for the table name + self.perTableRtes[rteName] += 1 + return " " + rteName + " " + + +def newQuery(): + """returns generated query""" + genCtx = GeneratorContext() + return _start(genCtx) + + +def _start(genCtx): + # Query ';' || 'WITH' CteList Query ';' + query = "" + if not genCtx.canGenerateNewCte() or shouldSelectThatBranch(): + query += _genQuery(genCtx) + else: + genCtx.insideCte = True + query += " WITH " + query += _genCteList(genCtx) + genCtx.insideCte = False + query += _genQuery(genCtx) + query += ";" + return query + + +def _genQuery(genCtx): + # SelectExpr FromExpr [OrderBy] [Limit] || 'SELECT' 'avg(avgsub.DistColName)' 'FROM' SubqueryRte 'AS avgsub' + query = "" + if ( + getConfig().useAvgAtTopLevelTarget + and not genCtx.insideCte + and not genCtx.usedAvg + ): + genCtx.usedAvg = True + query += "SELECT " + query += "count(*), avg(avgsub." + getConfig().commonColName + ") FROM " + query += _genSubqueryRte(genCtx) + query += " AS avgsub" + else: + query += _genSelectExpr(genCtx) + query += _genFromExpr(genCtx) + choseOrderby = False + if getConfig().orderby and shouldSelectThatBranch(): + query += _genOrderBy(genCtx) + choseOrderby = True + if getConfig().limit and shouldSelectThatBranch(): + if not choseOrderby and getConfig().forceOrderbyWithLimit: + query += _genOrderBy(genCtx) + query += _genLimit(genCtx) + return query + + +def _genOrderBy(genCtx): + # 'ORDER BY' DistColName + query = "" + query += " ORDER BY " + query += getConfig().commonColName + " " + return query + + +def _genLimit(genCtx): + # 'LIMIT' 'random()' + query = "" + query += " LIMIT " + (fromVal, toVal) = getConfig().limitRange + query += str(random.randint(fromVal, toVal)) + return query + + +def _genSelectExpr(genCtx): + # 'SELECT' 'curAlias()' '.' DistColName + query = "" + query += " SELECT " + commonColName = getConfig().commonColName + query += genCtx.curAlias() + "." + commonColName + " " + + return query + + +def _genFromExpr(genCtx): + # 'FROM' (Rte JoinList JoinOp Rte Using || RteList) ['WHERE' 'nextRandomAlias()' '.' DistColName RestrictExpr] + query = "" + query += " FROM " + + if shouldSelectThatBranch(): + query += _genRte(genCtx) + query += _genJoinList(genCtx) + query += randomJoinOp() + query += _genRte(genCtx) + query += _genUsing(genCtx) + else: + query += _genRteList(genCtx) + + alias = genCtx.removeLastAlias() + if shouldSelectThatBranch(): + query += " WHERE " + query += alias + "." + getConfig().commonColName + query += _genRestrictExpr(genCtx) + return query + + +def _genRestrictExpr(genCtx): + # ('<' || '>' || '=') Int || ['NOT'] 'IN' '(' SubqueryRte ')' + query = "" + if ( + not getConfig().semiAntiJoin + or not genCtx.canGenerateNewRte() + or shouldSelectThatBranch() + ): + query += randomRestrictOp() + (fromVal, toVal) = getConfig().filterRange + query += str(random.randint(fromVal, toVal)) + else: + if shouldSelectThatBranch(): + query += " NOT" + query += " IN " + query += _genSubqueryRte(genCtx) + return query + + +def _genCteList(genCtx): + # Cte [',' CteList] || Cte + query = "" + + if shouldSelectThatBranch(): + query += _genCte(genCtx) + if not genCtx.canGenerateNewCte(): + return query + query += "," + query += _genCteList(genCtx) + else: + query += _genCte(genCtx) + return query + + +def _genCte(genCtx): + # 'nextRandomAlias()' 'AS' '(' Query ')' + query = "" + query += genCtx.curCteAlias() + genCtx.currentCteCount += 1 + query += " AS " + query += " ( " + query += _genQuery(genCtx) + query += " ) " + return query + + +def _genRteList(genCtx): + # RteList -> Rte [, RteList] || Rte + query = "" + if getConfig().cartesianProduct and shouldSelectThatBranch(): + query += _genRte(genCtx) + if not genCtx.canGenerateNewRte(): + return query + if genCtx.insideCte and not genCtx.canGenerateNewRteInsideCte(): + return query + query += "," + query += _genRteList(genCtx) + else: + query += _genRte(genCtx) + return query + + +def _genJoinList(genCtx): + # JoinOp Rte Using JoinList || e + query = "" + + if shouldSelectThatBranch(): + if not genCtx.canGenerateNewRte(): + return query + if genCtx.insideCte and not genCtx.canGenerateNewRteInsideCte(): + return query + query += randomJoinOp() + query += _genRte(genCtx) + query += _genUsing(genCtx) + query += _genJoinList(genCtx) + return query + + +def _genUsing(genCtx): + # 'USING' '(' DistColName ')' + query = "" + query += " USING (" + getConfig().commonColName + " ) " + return query + + +def _genRte(genCtx): + # SubqueryRte as 'nextRandomAlias()' || RelationRte as 'curAlias()' || + # CteRte as 'curAlias()' || ValuesRte 'AS' 'nextRandomAlias()' + alias = genCtx.curAlias() + modifiedAlias = None + + if genCtx.insideCte: + genCtx.currentCteRteCount += 1 + else: + genCtx.currentRteCount += 1 + genCtx.totalRteCount += 1 + + # donot dive into recursive subquery further if we hit into rte limit, replace it with relation rte + rteType = randomRteType() + if not genCtx.canGenerateNewRte(): + rteType = RTEType.RELATION + + # donot dive into recursive subquery further if we hit into rte in cte limit, replace it with relation rte + if genCtx.insideCte and not genCtx.canGenerateNewRteInsideCte(): + rteType = RTEType.RELATION + + # we cannot refer to cte if we are inside it or we donot have any cte + if (genCtx.insideCte or not genCtx.hasAnyCte()) and rteType == RTEType.CTE: + rteType = RTEType.RELATION + + query = "" + if rteType == RTEType.SUBQUERY: + query += _genSubqueryRte(genCtx) + elif rteType == RTEType.RELATION: + query += _genRelationRte(genCtx) + elif rteType == RTEType.CTE: + query += _genCteRte(genCtx) + elif rteType == RTEType.VALUES: + query += _genValuesRte(genCtx) + modifiedAlias = alias + "(" + getConfig().commonColName + ") " + else: + raise BaseException("unknown RTE type") + + query += " AS " + query += alias if not modifiedAlias else modifiedAlias + genCtx.addAlias(alias) + + return query + + +def _genSubqueryRte(genCtx): + # '(' Query ')' + query = "" + query += " ( " + query += _genQuery(genCtx) + query += " ) " + return query + + +def _genRelationRte(genCtx): + # 'randomAllowedTableName()' + query = "" + query += genCtx.getRteNameEnforcingRteLimits() + return query + + +def _genCteRte(genCtx): + # 'randomCteName()' + query = "" + query += genCtx.randomCteName() + return query + + +def _genValuesRte(genCtx): + # '( VALUES(random()) )' + query = "" + (fromVal, toVal) = getConfig().dataRange + query += " ( VALUES(" + str(random.randint(fromVal, toVal)) + " ) ) " + return query diff --git a/src/test/regress/citus_tests/query_generator/random_selections.py b/src/test/regress/citus_tests/query_generator/random_selections.py new file mode 100644 index 000000000..188107753 --- /dev/null +++ b/src/test/regress/citus_tests/query_generator/random_selections.py @@ -0,0 +1,39 @@ +import random + +from node_defs import RestrictOp + +from config.config import getConfig + + +def shouldSelectThatBranch(): + """returns 0 or 1 randomly""" + return random.randint(0, 1) + + +def randomRteType(): + """returns a randomly selected RteType given at config""" + rtes = getConfig().targetRteTypes + return random.choice(rtes) + + +def randomJoinOp(): + """returns a randomly selected JoinOp given at config""" + joinTypes = getConfig().targetJoinTypes + return " " + random.choice(joinTypes).name + " JOIN" + + +def randomRestrictOp(): + """returns a randomly selected RestrictOp given at config""" + restrictOps = getConfig().targetRestrictOps + restrictOp = random.choice(restrictOps) + opText = "" + if restrictOp == RestrictOp.LT: + opText = "<" + elif restrictOp == RestrictOp.GT: + opText = ">" + elif restrictOp == RestrictOp.EQ: + opText = "=" + else: + raise BaseException("Unknown restrict op") + + return " " + opText + " "