Skip to content

Commit f66dd13

Browse files
committed
refactor: move check_regression into ab_test.py
This functions was only used in ab_test.py Signed-off-by: Egor Lazarchuk <yegorlz@amazon.co.uk>
1 parent cc83572 commit f66dd13

File tree

2 files changed

+31
-31
lines changed

2 files changed

+31
-31
lines changed

tests/framework/ab_test.py

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,9 @@
2121
of both invocations is the same, the test passes (with us being alerted to this situtation via a special pipeline that
2222
does not block PRs). If not, it fails, preventing PRs from introducing new vulnerable dependencies.
2323
"""
24-
import statistics
2524
from pathlib import Path
2625
from tempfile import TemporaryDirectory
27-
from typing import Callable, List, Optional, TypeVar
28-
29-
import scipy
26+
from typing import Callable, Optional, TypeVar
3027

3128
from framework import utils
3229
from framework.defs import FC_WORKSPACE_DIR
@@ -170,32 +167,6 @@ def set_did_not_grow_comparator(
170167
)
171168

172169

173-
def check_regression(
174-
a_samples: List[float], b_samples: List[float], *, n_resamples: int = 9999
175-
):
176-
"""Checks for a regression by performing a permutation test. A permutation test is a non-parametric test that takes
177-
three parameters: Two populations (sets of samples) and a function computing a "statistic" based on two populations.
178-
First, the test computes the statistic for the initial populations. It then randomly
179-
permutes the two populations (e.g. merges them and then randomly splits them again). For each such permuted
180-
population, the statistic is computed. Then, all the statistics are sorted, and the percentile of the statistic for the
181-
initial populations is computed. We then look at the fraction of statistics that are larger/smaller than that of the
182-
initial populations. The minimum of these two fractions will then become the p-value.
183-
184-
The idea is that if the two populations are indeed drawn from the same distribution (e.g. if performance did not
185-
change), then permuting will not affect the statistic (indeed, it should be approximately normal-distributed, and
186-
the statistic for the initial populations will be somewhere "in the middle").
187-
188-
Useful for performance tests.
189-
"""
190-
return scipy.stats.permutation_test(
191-
(a_samples, b_samples),
192-
# Compute the difference of means, such that a positive different indicates potential for regression.
193-
lambda x, y: statistics.mean(y) - statistics.mean(x),
194-
vectorized=False,
195-
n_resamples=n_resamples,
196-
)
197-
198-
199170
@with_filelock
200171
def git_clone(clone_path, commitish):
201172
"""Clone the repository at `commit`.

tools/ab_test.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,15 @@
2525
import sys
2626
from collections import defaultdict
2727
from pathlib import Path
28+
from typing import List
29+
30+
import scipy
2831

2932
# Hack to be able to use our test framework code
3033
sys.path.append(str(Path(__file__).parent.parent / "tests"))
3134

3235
# pylint:disable=wrong-import-position
33-
from framework.ab_test import binary_ab_test, check_regression
36+
from framework.ab_test import binary_ab_test
3437
from framework.properties import global_props
3538
from host_tools.metrics import get_metrics_logger
3639

@@ -215,6 +218,32 @@ def collect_data(tag: str, binary_dir: Path, pytest_opts: str):
215218
return load_data_series(Path(test_path))
216219

217220

221+
def check_regression(
222+
a_samples: List[float], b_samples: List[float], *, n_resamples: int = 9999
223+
):
224+
"""Checks for a regression by performing a permutation test. A permutation test is a non-parametric test that takes
225+
three parameters: Two populations (sets of samples) and a function computing a "statistic" based on two populations.
226+
First, the test computes the statistic for the initial populations. It then randomly
227+
permutes the two populations (e.g. merges them and then randomly splits them again). For each such permuted
228+
population, the statistic is computed. Then, all the statistics are sorted, and the percentile of the statistic for the
229+
initial populations is computed. We then look at the fraction of statistics that are larger/smaller than that of the
230+
initial populations. The minimum of these two fractions will then become the p-value.
231+
232+
The idea is that if the two populations are indeed drawn from the same distribution (e.g. if performance did not
233+
change), then permuting will not affect the statistic (indeed, it should be approximately normal-distributed, and
234+
the statistic for the initial populations will be somewhere "in the middle").
235+
236+
Useful for performance tests.
237+
"""
238+
return scipy.stats.permutation_test(
239+
(a_samples, b_samples),
240+
# Compute the difference of means, such that a positive different indicates potential for regression.
241+
lambda x, y: statistics.mean(y) - statistics.mean(x),
242+
vectorized=False,
243+
n_resamples=n_resamples,
244+
)
245+
246+
218247
def analyze_data(
219248
data_a,
220249
data_b,

0 commit comments

Comments
 (0)