Skip to content

Commit d42d39f

Browse files
committed
tests: build and run benchmark tests in each branch
In the previous implementation, same binary that built in the PR branch execute twice, which was not a correct A/B test. This has been fixed. Signed-off-by: Tomoya Iwata <iwata.tomoya@classmethod.jp>
1 parent a35b63e commit d42d39f

File tree

1 file changed

+37
-49
lines changed

1 file changed

+37
-49
lines changed

tests/integration_tests/performance/test_benchmarks.py

Lines changed: 37 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
# SPDX-License-Identifier: Apache-2.0
33
"""Optional benchmarks-do-not-regress test"""
44
import contextlib
5-
import json
65
import logging
76
import platform
87
import re
98
import shutil
109
from pathlib import Path
10+
from typing import Callable, List
1111

1212
import pytest
1313

@@ -18,43 +18,40 @@
1818
LOGGER = logging.getLogger(__name__)
1919

2020

21-
def get_executables():
21+
def get_benchmark_names() -> List[str]:
2222
"""
23-
Get a list of binaries for benchmarking
23+
Get a list of benchmark test names
2424
"""
2525

26-
# Passing --message-format json to cargo tells it to print its log in a json format. At the end, instead of the
27-
# usual "placed executable <...> at <...>" we'll get a json object with an 'executable' key, from which we
28-
# extract the path to the compiled benchmark binary.
2926
_, stdout, _ = cargo(
3027
"bench",
31-
f"--all --quiet --target {platform.machine()}-unknown-linux-musl --message-format json --no-run",
28+
f"--workspace --quiet --target {platform.machine()}-unknown-linux-musl",
29+
"--list",
3230
)
3331

34-
executables = []
35-
for line in stdout.splitlines():
36-
if line:
37-
msg = json.loads(line)
38-
executable = msg.get("executable")
39-
if executable:
40-
executables.append(executable)
32+
# Format a string like `page_fault #2: benchmark` to a string like `page_fault`.
33+
benchmark_names = [
34+
re.sub(r"\s#([0-9]*)", "", i.split(":")[0])
35+
for i in stdout.split("\n")
36+
if i.endswith(": benchmark")
37+
]
4138

42-
return executables
39+
return list(set(benchmark_names))
4340

4441

4542
@pytest.mark.no_block_pr
46-
@pytest.mark.parametrize("executable", get_executables())
47-
def test_no_regression_relative_to_target_branch(executable):
43+
@pytest.mark.parametrize("benchname", get_benchmark_names())
44+
def test_no_regression_relative_to_target_branch(benchname):
4845
"""
4946
Run the microbenchmarks in this repository, comparing results from pull
5047
request target branch against what's achieved on HEAD
5148
"""
52-
run_criterion = get_run_criterion(executable)
53-
compare_results = get_compare_results(executable)
49+
run_criterion = get_run_criterion(benchname)
50+
compare_results = get_compare_results(benchname)
5451
git_ab_test(run_criterion, compare_results)
5552

5653

57-
def get_run_criterion(executable):
54+
def get_run_criterion(benchmark_name) -> Callable[[Path, bool], Path]:
5855
"""
5956
Get function that executes specified benchmarks, and running them pinned to some CPU
6057
"""
@@ -64,62 +61,53 @@ def _run_criterion(firecracker_checkout: Path, is_a: bool) -> Path:
6461

6562
with contextlib.chdir(firecracker_checkout):
6663
utils.check_output(
67-
f"CARGO_TARGET_DIR=build/cargo_target taskset -c 1 {executable} --bench --save-baseline {baseline_name}"
64+
f"taskset -c 1 cargo bench --workspace --quiet -- {benchmark_name} --exact --save-baseline {baseline_name}"
6865
)
6966

7067
return firecracker_checkout / "build" / "cargo_target" / "criterion"
7168

7269
return _run_criterion
7370

7471

75-
def get_compare_results(executable):
72+
def get_compare_results(benchmark_name) -> Callable[[Path, Path], None]:
7673
"""
7774
Get function that compares the two recorded criterion baselines for regressions, assuming that "A" is the baseline from main
7875
"""
7976

8077
def _compare_results(location_a_baselines: Path, location_b_baselines: Path):
8178

82-
list_result = utils.check_output(
83-
f"CARGO_TARGET_DIR=build/cargo_target {executable} --bench --list"
79+
_, stdout, _ = cargo(
80+
"bench",
81+
f"--workspace --target {platform.machine()}-unknown-linux-musl --quiet",
82+
f"--exact {benchmark_name} --list",
8483
)
8584

8685
# Format a string like `page_fault #2: benchmark` to a string like `page_fault_2`.
8786
# Because under `cargo_target/criterion/`, a directory like `page_fault_2` will create.
88-
bench_marks = [
89-
re.sub(r"\s#(?P<sub_id>[1-9]+)", r"_\g<sub_id>", i.split(":")[0])
90-
for i in list_result.stdout.split("\n")
87+
bench_mark_targets = [
88+
re.sub(r"\s#(?P<sub_id>[0-9]*)", r"_\g<sub_id>", i.split(":")[0])
89+
for i in stdout.split("\n")
9190
if i.endswith(": benchmark")
9291
]
9392

94-
for benchmark in bench_marks:
95-
data = json.loads(
96-
(
97-
location_b_baselines / benchmark / "b_baseline" / "estimates.json"
98-
).read_text("utf-8")
99-
)
100-
101-
average_ns = data["mean"]["point_estimate"]
102-
103-
LOGGER.info("%s mean: %iµs", benchmark, average_ns / 1000)
104-
105-
# Assumption: location_b_baseline = cargo_target of current working directory. So just copy the a_baselines here
106-
# to do the comparison
107-
108-
for benchmark in bench_marks:
93+
# If benchmark test has multiple targets, the results of a single benchmark test will be output to multiple directories.
94+
# For example, `page_fault` and `page_fault_2`.
95+
# We need copy benchmark results each directories.
96+
for bench_mark_target in bench_mark_targets:
10997
shutil.copytree(
110-
location_a_baselines / benchmark / "a_baseline",
111-
location_b_baselines / benchmark / "a_baseline",
98+
location_a_baselines / bench_mark_target / "a_baseline",
99+
location_b_baselines / bench_mark_target / "a_baseline",
112100
)
113101

114-
bench_result = utils.check_output(
115-
f"CARGO_TARGET_DIR=build/cargo_target {executable} --bench --baseline a_baseline --load-baseline b_baseline",
116-
True,
117-
Path.cwd().parent,
102+
_, stdout, _ = cargo(
103+
"bench",
104+
f"--workspace --target {platform.machine()}-unknown-linux-musl",
105+
f"{benchmark_name} --baseline a_baseline --load-baseline b_baseline",
118106
)
119107

120108
regressions_only = "\n\n".join(
121109
result
122-
for result in bench_result.stdout.split("\n\n")
110+
for result in stdout.split("\n\n")
123111
if "Performance has regressed." in result
124112
)
125113

0 commit comments

Comments
 (0)