diff --git a/README.md b/README.md index f541ea6c..11201d4d 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,17 @@ While benchmarking an indexing side change, you might want to recreate the index python src/python/localrun.py -source wikimediumall -r ``` +For quick patch testing, you can control the number of JVM iterations and query repetitions to speed up the benchmark: +```bash +# Quick test: 5 JVM iterations, 10 query repetitions per JVM +python src/python/localrun.py -source wikimediumall -iterations 5 -warmups 10 + +# Full benchmark (default): 20 JVM iterations, 20 query repetitions per JVM +python src/python/localrun.py -source wikimediumall -iterations 20 -warmups 20 +``` + +**Note:** The `-iterations` parameter controls how many separate JVM processes are launched (default: 20), and `-warmups` controls how many times each query runs within a single JVM (default: 20). Running with default settings (20×20) provides the most statistically reliable results and recommended for benchmarks testing to get a complete picture. For quick patch validation, reducing these values significantly speeds up testing. + For details on all the available options, use the `-h` or `--help` parameter. # Running the geo benchmark diff --git a/src/python/example.py b/src/python/example.py index b064d6b7..33b6b4c9 100755 --- a/src/python/example.py +++ b/src/python/example.py @@ -29,12 +29,14 @@ parser.add_argument("-b", "--baseline", default=os.environ.get("BASELINE") or "lucene_baseline", help="Path to lucene repo to be used for baseline") parser.add_argument("-c", "--candidate", default=os.environ.get("CANDIDATE") or "lucene_candidate", help="Path to lucene repo to be used for candidate") parser.add_argument("-r", "--reindex", action="store_true", help="Reindex data for candidate run") + parser.add_argument("-iterations", "--iterations", default=20, type=int, help="Number of JVM iterations (separate JVM processes, default: 20)") + parser.add_argument("-warmups", "--warmups", default=20, type=int, help="Number of times each query runs within a single JVM for warmup (default: 20)") args = parser.parse_args() print("Running benchmarks with the following args: %s" % args) sourceData = competition.sourceData(args.source) countsAreCorrect = args.searchConcurrency != 0 - comp = competition.Competition(verifyCounts=not countsAreCorrect) + comp = competition.Competition(verifyCounts=not countsAreCorrect, jvmCount=args.iterations, taskRepeatCount=args.warmups) index = comp.newIndex( args.baseline, diff --git a/src/python/nightlyBench.py b/src/python/nightlyBench.py index 2a93b59d..92b3bca0 100644 --- a/src/python/nightlyBench.py +++ b/src/python/nightlyBench.py @@ -151,7 +151,7 @@ def buildIndex(r, runLogDir, desc, index, logFile): for tool in ("vmstat", "top"): logFileName = f"{constants.LOGS_DIR}/nightly.{tool}.log" if os.path.exists(logFileName): - print(f'remove pre-existing log file {logFileName}') + print(f"remove pre-existing log file {logFileName}") os.remove(logFileName) # aggregate at multiple stack depths so we can see patterns like "new BytesRef() is costly regardless of context", for example: