WIP: result parsing and false positivity analysis script

Essoz · Essoz · commit e2d94d399138 · 2025-02-23T15:41:06.000-05:00
diff --git a/eval_scripts/false_positive/analyze_results.py b/eval_scripts/false_positive/analyze_results.py
@@ -0,0 +1,79 @@
+import os
+
+import yaml
+from run_exp_for_class import EXPS, get_checker_output_dir, get_setup_key
+
+from mldaikon.checker import parse_checker_results
+from mldaikon.invariant.base_cls import read_inv_file
+
+
+def discover_checker_results() -> dict:
+    """Requires changing to the directory where the checker output files are stored."""
+
+    with open("setups.yml", "r") as f:
+        setups = yaml.load(f, Loader=yaml.FullLoader)
+
+    results = {}  # setup: [(program, results), ...]
+    valid_programs = [
+        f
+        for f in os.listdir("validset")
+        if os.path.isdir(os.path.join("validset", f)) and f != "data"
+    ]
+    # for setup in setups['setups']:
+    for setup in setups["setups"]:
+        for program in valid_programs:
+            checker_output_dir = get_checker_output_dir(setup, program)
+            if os.path.exists(checker_output_dir):
+                if get_setup_key(setup) not in results:
+                    results[get_setup_key(setup)] = []
+                results[get_setup_key(setup)].append((program, checker_output_dir))
+            else:
+                print(
+                    f"Warning: checker output directory for {program} in {setup} does not exist, skipping. {checker_output_dir}"
+                )
+    return results
+
+
+if __name__ == "__main__":
+    all_results = {}
+    for bench in EXPS:
+        os.chdir(bench)
+        results = discover_checker_results()
+        if results:
+            all_results[bench] = results
+        os.chdir("..")
+
+    # for each bench, for each setup, for each program, parse the results
+    for bench, setups in all_results.items():
+        os.chdir(bench)
+        for setup, programs in setups.items():
+            for program, checker_output_dir in programs:
+                # find the results files
+                result_and_inv_files = os.listdir(checker_output_dir)
+                assert "invariants.json" in result_and_inv_files
+                inv_file = os.path.join(checker_output_dir, "invariants.json")
+                failed_results = [
+                    f for f in result_and_inv_files if f.startswith("failed")
+                ][0]
+                failed_file = os.path.join(checker_output_dir, failed_results)
+                passed_results = [
+                    f for f in result_and_inv_files if f.startswith("passed")
+                ][0]
+                passed_file = os.path.join(checker_output_dir, passed_results)
+                not_triggered_results = [
+                    f for f in result_and_inv_files if f.startswith("not_triggered")
+                ][0]
+                not_triggered_file = os.path.join(
+                    checker_output_dir, not_triggered_results
+                )
+                # analyzing the results
+                failed = parse_checker_results(failed_file)
+                passed = parse_checker_results(passed_file)
+                non_triggered = parse_checker_results(not_triggered_file)
+                invariants = read_inv_file(inv_file)
+                assert len(failed) + len(passed) + len(non_triggered) == len(invariants)
+
+                print(
+                    f"Results for {program} in {setup} in {bench}, failed: {len(failed)} ({len(failed) / len(invariants)}), passed: {len(passed)} ({len(passed) / len(invariants)}), non_triggered: {len(non_triggered)} ({len(non_triggered) / len(invariants)})"
+                )
+        os.chdir("..")
diff --git a/eval_scripts/false_positive/run_exp_for_class.py b/eval_scripts/false_positive/run_exp_for_class.py
@@ -5,8 +5,7 @@
 
 import yaml
 
-EXPS = os.listdir(".")
-EXPS = [exp for exp in EXPS if os.path.isdir(exp)]
+EXPS = ["CNN", "RNN", "Transformers"]
 
 # get the current time (just date and HH:MM)
 READY_TRACES: list[str] = []
diff --git a/mldaikon/checker.py b/mldaikon/checker.py
@@ -13,6 +13,23 @@
 register_custom_excepthook()
 
 
+def parse_checker_results(file_name: str):
+    with open(file_name, "r") as f:
+        lines = f.readlines()
+
+    all_results: list[dict] = []
+    current_res_str = ""
+    for line in lines:
+        if line.startswith("{") and current_res_str:
+            all_results.append(json.loads(current_res_str))
+            current_res_str = ""
+        current_res_str += line
+
+    if current_res_str:
+        all_results.append(json.loads(current_res_str))
+    return all_results
+
+
 def check_engine(
     traces: list[Trace], invariants: list[Invariant], check_relation_first: bool
 ) -> list[CheckerResult]:
diff --git a/mldaikon/invariant/base_cls.py b/mldaikon/invariant/base_cls.py
@@ -69,7 +69,7 @@ def load_function_obj(func_name: str) -> Any:
         return func_obj
     except Exception as e:
         logger = logging.getLogger(__name__)
-        logger.warning(
+        logger.debug(
             f"Failed to load the object for the function: {func_name}, error: {e}"
         )
         return None
@@ -89,7 +89,7 @@ def load_function_signature(func_name: str) -> inspect.Signature | None:
         return FUNC_SIGNATURE_OBJS[func_name]
     except Exception as e:
         logger = logging.getLogger(__name__)
-        logger.warning(
+        logger.debug(
             f"Failed to load the signature for the function: {func_name}, error: {e}"
         )
         FUNC_SIGNATURE_OBJS[func_name] = (
@@ -129,7 +129,7 @@ def __init__(
         self.signature = load_function_signature(func_name)
         if self.signature is None:
             logger = logging.getLogger(__name__)
-            logger.warning(
+            logger.debug(
                 f"Failed to load the signature for the function: {func_name}, can only work on kwargs."
             )
             self.arguments = kwargs.copy()