@@ -277,6 +277,7 @@ def analyze_data(
277277 )
278278
279279 messages = []
280+ fails = []
280281 for dimension_set , metric , result , unit in failures :
281282 # Sanity check as described above
282283 if abs (statistics .mean (relative_changes_by_metric [metric ])) <= noise_threshold :
@@ -291,18 +292,30 @@ def analyze_data(
291292 old_mean = statistics .mean (processed_emf_a [dimension_set ][metric ][0 ])
292293 new_mean = statistics .mean (processed_emf_b [dimension_set ][metric ][0 ])
293294
295+ change_unit = format_with_reduced_unit (result .statistic , unit )
296+ change_p = result .statistic / old_mean
297+ old_unit = format_with_reduced_unit (old_mean , unit )
298+ new_unit = format_with_reduced_unit (new_mean , unit )
299+
300+ fail = dict (dimension_set )
301+ fail ["diff" ] = change_p
302+ fails .append (fail )
303+
294304 msg = (
295305 f"\033 [0;32m[Firecracker A/B-Test Runner]\033 [0m A/B-testing shows a change of "
296- f"{ format_with_reduced_unit ( result . statistic , unit ) } , or { result . statistic / old_mean :.2%} , "
297- f"(from { format_with_reduced_unit ( old_mean , unit ) } to { format_with_reduced_unit ( new_mean , unit ) } ) "
306+ f"{ change_unit } , or { change_p :.2%} , "
307+ f"(from { old_unit } to { new_unit } ) "
298308 f"for metric \033 [1m{ metric } \033 [0m with \033 [0;31m\033 [1mp={ result .pvalue } \033 [0m. "
299309 f"This means that observing a change of this magnitude or worse, assuming that performance "
300310 f"characteristics did not change across the tested commits, has a probability of { result .pvalue :.2%} . "
301311 f"Tested Dimensions:\n { json .dumps (dict (dimension_set ), indent = 2 , sort_keys = True )} "
302312 )
303313 messages .append (msg )
304314
305- assert not messages , "\n " + "\n " .join (messages )
315+ if messages :
316+ with open ("test_results/ab.json" , "w" ) as f :
317+ json .dump ({"fails" : fails }, f , indent = 2 , sort_keys = True )
318+ assert False , "\n " + "\n " .join (messages )
306319 print ("No regressions detected!" )
307320
308321
0 commit comments