diff --git a/tests/integration_tests/performance/test_benchmarks.py b/tests/integration_tests/performance/test_benchmarks.py deleted file mode 100644 index 6e6541a688d..00000000000 --- a/tests/integration_tests/performance/test_benchmarks.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# SPDX-License-Identifier: Apache-2.0 -"""Optional benchmarks-do-not-regress test""" -import contextlib -import json -import logging -import platform -import shutil -from pathlib import Path - -import pytest - -from framework import utils -from framework.ab_test import git_ab_test -from host_tools.cargo_build import cargo - -LOGGER = logging.getLogger(__name__) - - -@pytest.mark.no_block_pr -@pytest.mark.timeout(900) -def test_no_regression_relative_to_target_branch(): - """ - Run the microbenchmarks in this repository, comparing results from pull - request target branch against what's achieved on HEAD - """ - git_ab_test(run_criterion, compare_results) - - -def run_criterion(firecracker_checkout: Path, is_a: bool) -> Path: - """ - Executes all benchmarks by running "cargo bench --no-run", finding the executables, and running them pinned to some CPU - """ - baseline_name = "a_baseline" if is_a else "b_baseline" - - with contextlib.chdir(firecracker_checkout): - # Passing --message-format json to cargo tells it to print its log in a json format. At the end, instead of the - # usual "placed executable <...> at <...>" we'll get a json object with an 'executable' key, from which we - # extract the path to the compiled benchmark binary. - _, stdout, _ = cargo( - "bench", - f"--all --quiet --target {platform.machine()}-unknown-linux-musl --message-format json --no-run", - ) - - executables = [] - for line in stdout.split("\n"): - if line: - msg = json.loads(line) - executable = msg.get("executable") - if executable: - executables.append(executable) - - for executable in executables: - utils.check_output( - f"CARGO_TARGET_DIR=build/cargo_target taskset -c 1 {executable} --bench --save-baseline {baseline_name}" - ) - - return firecracker_checkout / "build" / "cargo_target" / "criterion" - - -def compare_results(location_a_baselines: Path, location_b_baselines: Path): - """Compares the two recorded criterion baselines for regressions, assuming that "A" is the baseline from main""" - for benchmark in location_b_baselines.glob("*"): - data = json.loads( - (benchmark / "b_baseline" / "estimates.json").read_text("utf-8") - ) - - average_ns = data["mean"]["point_estimate"] - - LOGGER.info("%s mean: %iµs", benchmark.name, average_ns / 1000) - - # Assumption: location_b_baseline = cargo_target of current working directory. So just copy the a_baselines here - # to do the comparison - for benchmark in location_a_baselines.glob("*"): - shutil.copytree( - benchmark / "a_baseline", - location_b_baselines / benchmark.name / "a_baseline", - ) - - _, stdout, _ = cargo( - "bench", - f"--all --target {platform.machine()}-unknown-linux-musl", - "--baseline a_baseline --load-baseline b_baseline", - ) - - regressions_only = "\n\n".join( - result - for result in stdout.split("\n\n") - if "Performance has regressed." in result - ) - - # If this string is anywhere in stdout, then at least one of our benchmarks - # is now performing worse with the PR changes. - assert not regressions_only, "\n" + regressions_only