Query Generator Seed (#6883)

- Give seed number as argument to query generator to reproduce a
previous run.
- Expose the difference between results, if any, as artifact on CI.
pull/6907/head
aykut-bozkurt 2023-05-03 15:54:11 +03:00 committed by GitHub
parent e444dd4f3f
commit 2d005ac777
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 62 additions and 14 deletions

View File

@ -474,6 +474,9 @@ jobs:
- store_artifacts: - store_artifacts:
name: 'Save dmls' name: 'Save dmls'
path: src/test/regress/citus_tests/query_generator/out/queries.sql path: src/test/regress/citus_tests/query_generator/out/queries.sql
- store_artifacts:
name: 'Save diffs'
path: src/test/regress/citus_tests/query_generator/out/local_dist.diffs
- stack_trace - stack_trace
- coverage: - coverage:
flags: 'test_<< parameters.pg_major >>,querygen' flags: 'test_<< parameters.pg_major >>,querygen'

View File

@ -264,7 +264,7 @@ check-pytest:
pytest -n auto pytest -n auto
check-query-generator: all check-query-generator: all
${query_generator_check} --bindir=$(bindir) --pgxsdir=$(pgxsdir) ${query_generator_check} --bindir=$(bindir) --pgxsdir=$(pgxsdir) --seed=$(seed)
check-citus-upgrade: all check-citus-upgrade: all
$(citus_upgrade_check) \ $(citus_upgrade_check) \

View File

@ -20,10 +20,12 @@ citus_dev make testCluster --destroy
2. Run the test, 2. Run the test,
```bash ```bash
cd src/test/regress/citus_tests/query_generator/bin cd src/test/regress/citus_tests/query_generator/bin
bash citus_compare_dist_local_joins.sh <username> <dbname> <coordinator_port> bash citus_compare_dist_local_joins.sh <username> <dbname> <coordinator_port> Optional:<seed>
``` ```
3. See the diff content in `src/test/regress/citus_tests/query_generator/out/local_dist_diffs` 3. See the diff content in `src/test/regress/citus_tests/query_generator/out/local_dist_diffs`
Note: `seed` can be used to reproduce a run of Citus test by generating the same queries and results via the given seed.
### Configuration ### Configuration
You can configure 3 different parts: You can configure 3 different parts:

View File

@ -6,6 +6,7 @@ set -euo pipefail
psql_user=$1 psql_user=$1
psql_db=$2 psql_db=$2
psql_port=$3 psql_port=$3
seed=${4:-""}
runDDLs() runDDLs()
{ {
@ -39,7 +40,7 @@ showDiffs()
# run query generator and let it create output ddls and queries # run query generator and let it create output ddls and queries
script_folder=$(dirname "$0") script_folder=$(dirname "$0")
out_folder="${script_folder}"/../out out_folder="${script_folder}"/../out
pushd . && cd "${script_folder}"/.. && python3 generate_queries.py && popd pushd . && cd "${script_folder}"/.. && python3 generate_queries.py --seed="${seed}" && popd
# remove result files if exists # remove result files if exists
rm -rf "${out_folder}"/dist_queries.out "${out_folder}"/local_queries.out rm -rf "${out_folder}"/dist_queries.out "${out_folder}"/local_queries.out

View File

@ -2,11 +2,12 @@
"""query_gen_test """query_gen_test
Usage: Usage:
run_query_compare_test --bindir=<bindir> --pgxsdir=<pgxsdir> run_query_compare_test --bindir=<bindir> --pgxsdir=<pgxsdir> --seed=<seed>
Options: Options:
--bindir=<bindir> PostgreSQL executable directory(ex: '~/.pgenv/pgsql-10.4/bin') --bindir=<bindir> PostgreSQL executable directory(ex: '~/.pgenv/pgsql-10.4/bin')
--pgxsdir=<pgxsdir> Path to the PGXS directory(ex: ~/.pgenv/src/postgresql-11.3) --pgxsdir=<pgxsdir> Path to the PGXS directory(ex: ~/.pgenv/src/postgresql-11.3)
--seed=<seed> Seed number used by the query generator.(ex: 123)
""" """
import os import os
@ -27,7 +28,7 @@ import common # noqa: E402
import config as cfg # noqa: E402 import config as cfg # noqa: E402
def run_test(config): def run_test(config, seed):
# start cluster # start cluster
common.initialize_temp_dir(cfg.CITUS_ARBITRARY_TEST_DIR) common.initialize_temp_dir(cfg.CITUS_ARBITRARY_TEST_DIR)
common.initialize_citus_cluster( common.initialize_citus_cluster(
@ -36,8 +37,8 @@ def run_test(config):
# run test # run test
scriptDirPath = os.path.dirname(os.path.abspath(__file__)) scriptDirPath = os.path.dirname(os.path.abspath(__file__))
testRunCommand = "bash {}/citus_compare_dist_local_joins.sh {} {} {}".format( testRunCommand = "bash {}/citus_compare_dist_local_joins.sh {} {} {} {}".format(
scriptDirPath, config.user, config.dbname, config.coordinator_port() scriptDirPath, config.user, config.dbname, config.coordinator_port(), seed
) )
process = subprocess.Popen( process = subprocess.Popen(
testRunCommand.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE testRunCommand.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE
@ -56,8 +57,10 @@ def run_test(config):
if __name__ == "__main__": if __name__ == "__main__":
citusClusterConfig = cfg.CitusSuperUserDefaultClusterConfig( arguments = docopt(__doc__, version="run_query_compare_test")
docopt(__doc__, version="run_query_compare_test") citusClusterConfig = cfg.CitusSuperUserDefaultClusterConfig(arguments)
)
run_test(citusClusterConfig) seed = ""
if "--seed" in arguments and arguments["--seed"] != "":
seed = arguments["--seed"]
run_test(citusClusterConfig, seed)

View File

@ -1,4 +1,5 @@
import copy import copy
import os
import yaml import yaml
from config.config_parser import ( from config.config_parser import (
@ -13,7 +14,9 @@ from node_defs import CitusType
class Config: class Config:
def __init__(self): def __init__(self):
configObj = Config.parseConfigFile("config/config.yaml") configObj = Config.parseConfigFile(
f"{os.path.dirname(os.path.abspath(__file__))}/config.yaml"
)
self.targetTables = _distinctCopyTables( self.targetTables = _distinctCopyTables(
parseTableArray(configObj["targetTables"]) parseTableArray(configObj["targetTables"])

View File

@ -12,6 +12,7 @@ def getTableData():
for table in tables: for table in tables:
# generate base rows # generate base rows
dataGenerationSql += _genOverlappingData(table.name, fromVal, table.rowCount) dataGenerationSql += _genOverlappingData(table.name, fromVal, table.rowCount)
dataGenerationSql += "\n"
dataGenerationSql += _genNonOverlappingData(table.name, toVal, tableIdx) dataGenerationSql += _genNonOverlappingData(table.name, toVal, tableIdx)
dataGenerationSql += "\n" dataGenerationSql += "\n"

View File

@ -1,9 +1,23 @@
#!/usr/bin/env python3
"""generate_queries
Usage:
generate_queries --seed=<seed>
Options:
--seed=<seed> Seed number used by the query generator.(ex: 123)
"""
import os
import random
import signal import signal
import sys import sys
from data_gen import getTableData from data_gen import getTableData
from ddl_gen import getTableDDLs from ddl_gen import getTableDDLs
from docopt import docopt
from query_gen import newQuery from query_gen import newQuery
from random_selections import currentMilliSecs
from config.config import getConfig, resetConfig from config.config import getConfig, resetConfig
@ -29,12 +43,16 @@ def _interactiveMode(ddls, data):
def _fileMode(ddls, data): def _fileMode(ddls, data):
ddlFileName = "out/" + getConfig().ddlOutFile ddlFileName = (
f"{os.path.dirname(os.path.abspath(__file__))}/out/{getConfig().ddlOutFile}"
)
with open(ddlFileName, "w") as ddlFile: with open(ddlFileName, "w") as ddlFile:
ddlFile.writelines([ddls, data]) ddlFile.writelines([ddls, data])
queryCount = getConfig().queryCount queryCount = getConfig().queryCount
fileName = "out/" + getConfig().queryOutFile fileName = (
f"{os.path.dirname(os.path.abspath(__file__))}/out/{getConfig().queryOutFile}"
)
with open(fileName, "w") as f: with open(fileName, "w") as f:
# enable repartition joins due to https://github.com/citusdata/citus/issues/6865 # enable repartition joins due to https://github.com/citusdata/citus/issues/6865
enableRepartitionJoinCommand = "SET citus.enable_repartition_joins TO on;\n" enableRepartitionJoinCommand = "SET citus.enable_repartition_joins TO on;\n"
@ -54,6 +72,17 @@ def _fileMode(ddls, data):
if __name__ == "__main__": if __name__ == "__main__":
signal.signal(signal.SIGINT, _signal_handler) signal.signal(signal.SIGINT, _signal_handler)
arguments = docopt(__doc__, version="generate_queries")
seed = -1
if "--seed" in arguments and arguments["--seed"] != "":
seed = int(arguments["--seed"])
else:
seed = currentMilliSecs()
assert seed > 0
random.seed(seed)
print(f"---SEED: {seed} ---")
resetConfig() resetConfig()
ddls = getTableDDLs() ddls = getTableDDLs()

View File

@ -1,4 +1,5 @@
import random import random
import time
from node_defs import RestrictOp from node_defs import RestrictOp
@ -10,6 +11,11 @@ def shouldSelectThatBranch():
return random.randint(0, 1) return random.randint(0, 1)
def currentMilliSecs():
"""returns total milliseconds since epoch"""
return round(time.time() * 1000)
def randomRteType(): def randomRteType():
"""returns a randomly selected RteType given at config""" """returns a randomly selected RteType given at config"""
rtes = getConfig().targetRteTypes rtes = getConfig().targetRteTypes