Query Generator Seed (#6883)

- Give seed number as argument to query generator to reproduce a
previous run.
- Expose the difference between results, if any, as artifact on CI.
pull/6907/head
aykut-bozkurt 2023-05-03 15:54:11 +03:00 committed by GitHub
parent e444dd4f3f
commit 2d005ac777
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 62 additions and 14 deletions

View File

@ -474,6 +474,9 @@ jobs:
- store_artifacts:
name: 'Save dmls'
path: src/test/regress/citus_tests/query_generator/out/queries.sql
- store_artifacts:
name: 'Save diffs'
path: src/test/regress/citus_tests/query_generator/out/local_dist.diffs
- stack_trace
- coverage:
flags: 'test_<< parameters.pg_major >>,querygen'

View File

@ -264,7 +264,7 @@ check-pytest:
pytest -n auto
check-query-generator: all
${query_generator_check} --bindir=$(bindir) --pgxsdir=$(pgxsdir)
${query_generator_check} --bindir=$(bindir) --pgxsdir=$(pgxsdir) --seed=$(seed)
check-citus-upgrade: all
$(citus_upgrade_check) \

View File

@ -20,10 +20,12 @@ citus_dev make testCluster --destroy
2. Run the test,
```bash
cd src/test/regress/citus_tests/query_generator/bin
bash citus_compare_dist_local_joins.sh <username> <dbname> <coordinator_port>
bash citus_compare_dist_local_joins.sh <username> <dbname> <coordinator_port> Optional:<seed>
```
3. See the diff content in `src/test/regress/citus_tests/query_generator/out/local_dist_diffs`
Note: `seed` can be used to reproduce a run of Citus test by generating the same queries and results via the given seed.
### Configuration
You can configure 3 different parts:

View File

@ -6,6 +6,7 @@ set -euo pipefail
psql_user=$1
psql_db=$2
psql_port=$3
seed=${4:-""}
runDDLs()
{
@ -39,7 +40,7 @@ showDiffs()
# run query generator and let it create output ddls and queries
script_folder=$(dirname "$0")
out_folder="${script_folder}"/../out
pushd . && cd "${script_folder}"/.. && python3 generate_queries.py && popd
pushd . && cd "${script_folder}"/.. && python3 generate_queries.py --seed="${seed}" && popd
# remove result files if exists
rm -rf "${out_folder}"/dist_queries.out "${out_folder}"/local_queries.out

View File

@ -2,11 +2,12 @@
"""query_gen_test
Usage:
run_query_compare_test --bindir=<bindir> --pgxsdir=<pgxsdir>
run_query_compare_test --bindir=<bindir> --pgxsdir=<pgxsdir> --seed=<seed>
Options:
--bindir=<bindir> PostgreSQL executable directory(ex: '~/.pgenv/pgsql-10.4/bin')
--pgxsdir=<pgxsdir> Path to the PGXS directory(ex: ~/.pgenv/src/postgresql-11.3)
--seed=<seed> Seed number used by the query generator.(ex: 123)
"""
import os
@ -27,7 +28,7 @@ import common # noqa: E402
import config as cfg # noqa: E402
def run_test(config):
def run_test(config, seed):
# start cluster
common.initialize_temp_dir(cfg.CITUS_ARBITRARY_TEST_DIR)
common.initialize_citus_cluster(
@ -36,8 +37,8 @@ def run_test(config):
# run test
scriptDirPath = os.path.dirname(os.path.abspath(__file__))
testRunCommand = "bash {}/citus_compare_dist_local_joins.sh {} {} {}".format(
scriptDirPath, config.user, config.dbname, config.coordinator_port()
testRunCommand = "bash {}/citus_compare_dist_local_joins.sh {} {} {} {}".format(
scriptDirPath, config.user, config.dbname, config.coordinator_port(), seed
)
process = subprocess.Popen(
testRunCommand.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE
@ -56,8 +57,10 @@ def run_test(config):
if __name__ == "__main__":
citusClusterConfig = cfg.CitusSuperUserDefaultClusterConfig(
docopt(__doc__, version="run_query_compare_test")
)
arguments = docopt(__doc__, version="run_query_compare_test")
citusClusterConfig = cfg.CitusSuperUserDefaultClusterConfig(arguments)
run_test(citusClusterConfig)
seed = ""
if "--seed" in arguments and arguments["--seed"] != "":
seed = arguments["--seed"]
run_test(citusClusterConfig, seed)

View File

@ -1,4 +1,5 @@
import copy
import os
import yaml
from config.config_parser import (
@ -13,7 +14,9 @@ from node_defs import CitusType
class Config:
def __init__(self):
configObj = Config.parseConfigFile("config/config.yaml")
configObj = Config.parseConfigFile(
f"{os.path.dirname(os.path.abspath(__file__))}/config.yaml"
)
self.targetTables = _distinctCopyTables(
parseTableArray(configObj["targetTables"])

View File

@ -12,6 +12,7 @@ def getTableData():
for table in tables:
# generate base rows
dataGenerationSql += _genOverlappingData(table.name, fromVal, table.rowCount)
dataGenerationSql += "\n"
dataGenerationSql += _genNonOverlappingData(table.name, toVal, tableIdx)
dataGenerationSql += "\n"

View File

@ -1,9 +1,23 @@
#!/usr/bin/env python3
"""generate_queries
Usage:
generate_queries --seed=<seed>
Options:
--seed=<seed> Seed number used by the query generator.(ex: 123)
"""
import os
import random
import signal
import sys
from data_gen import getTableData
from ddl_gen import getTableDDLs
from docopt import docopt
from query_gen import newQuery
from random_selections import currentMilliSecs
from config.config import getConfig, resetConfig
@ -29,12 +43,16 @@ def _interactiveMode(ddls, data):
def _fileMode(ddls, data):
ddlFileName = "out/" + getConfig().ddlOutFile
ddlFileName = (
f"{os.path.dirname(os.path.abspath(__file__))}/out/{getConfig().ddlOutFile}"
)
with open(ddlFileName, "w") as ddlFile:
ddlFile.writelines([ddls, data])
queryCount = getConfig().queryCount
fileName = "out/" + getConfig().queryOutFile
fileName = (
f"{os.path.dirname(os.path.abspath(__file__))}/out/{getConfig().queryOutFile}"
)
with open(fileName, "w") as f:
# enable repartition joins due to https://github.com/citusdata/citus/issues/6865
enableRepartitionJoinCommand = "SET citus.enable_repartition_joins TO on;\n"
@ -54,6 +72,17 @@ def _fileMode(ddls, data):
if __name__ == "__main__":
signal.signal(signal.SIGINT, _signal_handler)
arguments = docopt(__doc__, version="generate_queries")
seed = -1
if "--seed" in arguments and arguments["--seed"] != "":
seed = int(arguments["--seed"])
else:
seed = currentMilliSecs()
assert seed > 0
random.seed(seed)
print(f"---SEED: {seed} ---")
resetConfig()
ddls = getTableDDLs()

View File

@ -1,4 +1,5 @@
import random
import time
from node_defs import RestrictOp
@ -10,6 +11,11 @@ def shouldSelectThatBranch():
return random.randint(0, 1)
def currentMilliSecs():
"""returns total milliseconds since epoch"""
return round(time.time() * 1000)
def randomRteType():
"""returns a randomly selected RteType given at config"""
rtes = getConfig().targetRteTypes