@@ -251,11 +251,14 @@ def interpret_ks_test(base, new, pvalue_threshold=PVALUE_THRESHOLD):
251251
252252def mann_whitney_pval_significance (mann_pvalue , pvalue_threshold = PVALUE_THRESHOLD ):
253253 p_value_interpretation = None
254+ is_significant = False
255+
254256 if mann_pvalue > pvalue_threshold :
255257 p_value_interpretation = "not significant"
256258 if mann_pvalue <= pvalue_threshold :
259+ is_significant = True
257260 p_value_interpretation = "significant"
258- return p_value_interpretation
261+ return p_value_interpretation , is_significant
259262
260263
261264# Mann-Whitney U test
@@ -268,71 +271,61 @@ def interpret_mann_whitneyu(base, new, pvalue_threshold=PVALUE_THRESHOLD):
268271 mann_stat = float (mann_stat ) if mann_stat else None
269272 mann_pvalue = float (mann_pvalue ) if mann_pvalue else None
270273 # Mann-Whitney U p-value interpretation
271- p_value_interpretation = mann_whitney_pval_significance (mann_pvalue , pvalue_threshold )
274+ p_value_interpretation , is_significant = mann_whitney_pval_significance (
275+ mann_pvalue , pvalue_threshold
276+ )
272277
273278 mann_whitney = {
274279 "test_name" : "Mann-Whitney U" ,
275280 "stat" : mann_stat ,
276281 "pvalue" : mann_pvalue ,
277282 "interpretation" : p_value_interpretation ,
278283 }
279- return mann_whitney , mann_stat , mann_pvalue
284+ return mann_whitney , mann_stat , mann_pvalue , is_significant
280285
281286
282287# https://openpublishing.library.umass.edu/pare/article/1977/galley/1980/view/
283288def interpret_effect_size (delta ):
289+ is_effect_meaningful = False
284290 if delta is None :
285- return "Effect cannot be interpreted"
291+ return "Effect cannot be interpreted" , is_effect_meaningful
286292 if abs (delta ) < 0.15 :
287- return "negligible"
288- elif abs (delta ) < 0.33 :
289- return "small"
290- elif abs (delta ) < 0.47 :
291- return "moderate"
293+ return "negligible" , is_effect_meaningful
294+ if abs (delta ) < 0.33 :
295+ is_effect_meaningful = True
296+ return "small" , is_effect_meaningful
297+ if abs (delta ) < 0.47 :
298+ is_effect_meaningful = True
299+ return "moderate" , is_effect_meaningful
292300 else :
293- return "large"
301+ is_effect_meaningful = True
302+ return "large" , is_effect_meaningful
294303
295304
296305def interpret_cles_direction (cles , pvalue_threshold = PVALUE_THRESHOLD ):
297- greater_rev = None
306+ is_base_greater = None
298307 if cles is None :
299- return "CLES cannot be interpreted" , greater_rev
300- if cles > pvalue_threshold :
301- greater_rev = "base"
302- return f"{ cles :.0%} chance a base value > a new value" , greater_rev
303- if cles < pvalue_threshold :
304- greater_rev = "new"
305- return f"{ 1 - cles :.0%} chance a new value > base value" , greater_rev
306- return "CLES cannot be interpreted" , greater_rev
307-
308-
309- def is_new_better (c_delta , cles , mann_pvalue , lower_is_better , pvalue_threshold = PVALUE_THRESHOLD ):
310- """This method takes in CLES to measure if meaningful, Mann Whitney p-val for significance as well as Cliff's Delta for change"""
308+ return "CLES cannot be interpreted" , is_base_greater
309+ elif cles > pvalue_threshold :
310+ is_base_greater = True
311+ return f"{ cles :.0%} chance a base value > a new value" , is_base_greater
312+ elif cles < pvalue_threshold :
313+ is_base_greater = False
314+ return f"{ 1 - cles :.0%} chance a new value > base value" , is_base_greater
315+ return "CLES cannot be interpreted" , is_base_greater
316+
317+
318+ def is_new_better (is_effect_meaningful , is_base_greater , is_significant , lower_is_better ):
311319 # Possibility Base > than New with a small amount or more significance
312- cles_interpretation , greater_rev = interpret_cles_direction (
313- cles , pvalue_threshold = PVALUE_THRESHOLD
314- )
315- effect_size = interpret_effect_size (c_delta )
316- effect_value_significance = ["small" , "moderate" , "large" ]
317- p_value_interpretation = mann_whitney_pval_significance (mann_pvalue , pvalue_threshold )
318-
319- if (
320- greater_rev == "base"
321- and any (effect_size in effect_value_significance )
322- and p_value_interpretation == "significant"
323- ):
320+ if is_base_greater and is_effect_meaningful and is_significant :
324321 if lower_is_better :
325322 is_new_better = True
326323 direction = "improvement"
327324 else :
328325 is_new_better = False
329326 direction = "regression"
330327 # Possibility New > Base with a small amount or more significance
331- if (
332- greater_rev == "new"
333- and any (effect_size in effect_value_significance )
334- and p_value_interpretation == "significant"
335- ):
328+ elif (is_base_greater is False ) and is_effect_meaningful and is_significant :
336329 if lower_is_better :
337330 is_new_better = False
338331 direction = "regression"
@@ -377,13 +370,11 @@ def interpret_performance_direction(ci_low, ci_high, lower_is_better):
377370# Common Language Effect Size, and its interpretation in english
378371def interpret_cles (
379372 mann_stat ,
380- mann_pvalue ,
381373 new_revision ,
382374 base_revision ,
383375 delta ,
384376 interpretation ,
385377 lower_is_better ,
386- pvalue_threshold = PVALUE_THRESHOLD ,
387378):
388379 try :
389380 cles = None
@@ -403,9 +394,8 @@ def interpret_cles(
403394 else :
404395 mann_whitney_u_cles = ""
405396
406- is_significant = False if mann_pvalue > pvalue_threshold else True
407397 # Generate CLES explanation
408- cles_explanation = interpret_cles_direction (cles ) if cles else ""
398+ cles_explanation , is_base_greater = interpret_cles_direction (cles ) if cles else "" , None
409399 # Cliff's delta CLES
410400 cliffs_delta_cles = f"Cliff's Delta: { delta :.2f} → { interpretation } " if delta else ""
411401
@@ -419,10 +409,10 @@ def interpret_cles(
419409 return (
420410 cles_obj ,
421411 cles ,
422- is_significant ,
423412 cles_explanation ,
424413 mann_whitney_u_cles ,
425414 cliffs_delta_cles ,
415+ is_base_greater ,
426416 )
427417 except Exception :
428418 return None , None , None , None , None , None
@@ -487,22 +477,35 @@ def interpret_silverman_kde(base_data, new_data, lower_is_better):
487477 modes = []
488478 base_intervals , base_peak_xs = find_mode_interval (x_base , y_base , base_peak_locs )
489479 new_intervals , new_peak_xs = find_mode_interval (x_new , y_new , new_peak_locs )
490-
491- if base_mode_count == new_mode_count :
492- per_mode_new = split_per_mode (new_data , new_intervals )
493- per_mode_base = split_per_mode (base_data , base_intervals )
494-
495- for i , interval in enumerate (base_intervals ):
496- tup = interval
497- if len (tup ) != 2 :
498- return None , None , None , None , None , None
499-
500- start , end = tup
501- shift = 0
502- ci_low = 0
503- ci_high = 0
504- median_shift_summary = None
505- mode_name = f"Mode { i + 1 } "
480+ for i , interval in enumerate (base_intervals ):
481+ tup = interval
482+ if len (tup ) != 2 :
483+ return None , None , None , None , None , None
484+
485+ start , end = tup
486+ shift = 0
487+ ci_low = 0
488+ ci_high = 0
489+ median_shift_summary = (
490+ "Cannot measure shift, base mode count not equal to new mode count"
491+ )
492+ shift = None
493+ mode_name = f"Mode { i + 1 } "
494+ mode_info = {
495+ "mode_name" : mode_name ,
496+ "mode_start" : f"{ start :.2f} " if start else None ,
497+ "mode_end" : f"{ end :.2f} " if end else None ,
498+ "median_shift_summary" : median_shift_summary ,
499+ "ci_low" : ci_low ,
500+ "ci_high" : ci_high ,
501+ "shift" : shift ,
502+ "shift_summary" : performance_intepretation ,
503+ "ci_warning" : ci_warning ,
504+ }
505+
506+ if base_mode_count == new_mode_count :
507+ per_mode_new = split_per_mode (new_data , new_intervals )
508+ per_mode_base = split_per_mode (base_data , base_intervals )
506509
507510 try :
508511 ref_vals = [val for val , mode in zip (base_data , per_mode_base ) if mode == i ]
@@ -540,33 +543,7 @@ def interpret_silverman_kde(base_data, new_data, lower_is_better):
540543 "shift_summary" : performance_intepretation ,
541544 "ci_warning" : ci_warning ,
542545 }
543- modes .append (mode_info )
544- else :
545- for i , interval in enumerate (base_intervals ):
546- tup = interval
547- if len (tup ) != 2 :
548- return None , None , None , None , None , None
549-
550- start , end = tup
551- shift = 0
552- ci_low = 0
553- ci_high = 0
554- median_shift_summary = (
555- "Cannot measure shift, base mode count not equal to new mode count."
556- )
557- shift = None
558- mode_name = f"Mode { i + 1 } "
559- mode_info = {
560- "mode_name" : mode_name ,
561- "mode_start" : f"{ start :.2f} " if start else None ,
562- "mode_end" : f"{ end :.2f} " if end else None ,
563- "median_shift_summary" : median_shift_summary ,
564- "ci_low" : ci_low ,
565- "ci_high" : ci_high ,
566- "shift" : shift ,
567- "shift_summary" : performance_intepretation ,
568- "ci_warning" : ci_warning ,
569- }
546+
570547 modes .append (mode_info )
571548
572549 silverman_kde = {
0 commit comments